1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #define DEBUG_TYPE "legalizer"
29 
30 using namespace llvm;
31 using namespace LegalizeActions;
32 
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 ///
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
37 ///
38 /// Returns -1 in the first element of the pair if the breakdown is not
39 /// satisfiable.
40 static std::pair<int, int>
41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42   assert(!LeftoverTy.isValid() && "this is an out argument");
43 
44   unsigned Size = OrigTy.getSizeInBits();
45   unsigned NarrowSize = NarrowTy.getSizeInBits();
46   unsigned NumParts = Size / NarrowSize;
47   unsigned LeftoverSize = Size - NumParts * NarrowSize;
48   assert(Size > NarrowSize);
49 
50   if (LeftoverSize == 0)
51     return {NumParts, 0};
52 
53   if (NarrowTy.isVector()) {
54     unsigned EltSize = OrigTy.getScalarSizeInBits();
55     if (LeftoverSize % EltSize != 0)
56       return {-1, -1};
57     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58   } else {
59     LeftoverTy = LLT::scalar(LeftoverSize);
60   }
61 
62   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63   return std::make_pair(NumParts, NumLeftover);
64 }
65 
66 static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
67   if (OrigTy.isVector() && TargetTy.isVector()) {
68     assert(OrigTy.getElementType() == TargetTy.getElementType());
69     int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
70                                     TargetTy.getNumElements());
71     return LLT::scalarOrVector(GCD, OrigTy.getElementType());
72   }
73 
74   if (OrigTy.isVector() && !TargetTy.isVector()) {
75     assert(OrigTy.getElementType() == TargetTy);
76     return TargetTy;
77   }
78 
79   assert(!OrigTy.isVector() && !TargetTy.isVector() &&
80          "GCD type of vector and scalar not implemented");
81 
82   int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
83                                   TargetTy.getSizeInBits());
84   return LLT::scalar(GCD);
85 }
86 
87 static LLT getLCMType(LLT Ty0, LLT Ty1) {
88   assert(Ty0.isScalar() && Ty1.isScalar() && "not yet handled");
89   unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits();
90   int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(),
91                                       Ty1.getSizeInBits());
92   return LLT::scalar(Mul / GCDSize);
93 }
94 
95 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
96                                  GISelChangeObserver &Observer,
97                                  MachineIRBuilder &Builder)
98     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
99       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
100   MIRBuilder.setMF(MF);
101   MIRBuilder.setChangeObserver(Observer);
102 }
103 
104 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
105                                  GISelChangeObserver &Observer,
106                                  MachineIRBuilder &B)
107     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
108   MIRBuilder.setMF(MF);
109   MIRBuilder.setChangeObserver(Observer);
110 }
111 LegalizerHelper::LegalizeResult
112 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
113   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
114 
115   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
116       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
117     return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
118                                                      : UnableToLegalize;
119   auto Step = LI.getAction(MI, MRI);
120   switch (Step.Action) {
121   case Legal:
122     LLVM_DEBUG(dbgs() << ".. Already legal\n");
123     return AlreadyLegal;
124   case Libcall:
125     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
126     return libcall(MI);
127   case NarrowScalar:
128     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
129     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
130   case WidenScalar:
131     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
132     return widenScalar(MI, Step.TypeIdx, Step.NewType);
133   case Lower:
134     LLVM_DEBUG(dbgs() << ".. Lower\n");
135     return lower(MI, Step.TypeIdx, Step.NewType);
136   case FewerElements:
137     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
138     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
139   case MoreElements:
140     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
141     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
142   case Custom:
143     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
144     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
145                                                             : UnableToLegalize;
146   default:
147     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
148     return UnableToLegalize;
149   }
150 }
151 
152 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
153                                    SmallVectorImpl<Register> &VRegs) {
154   for (int i = 0; i < NumParts; ++i)
155     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
156   MIRBuilder.buildUnmerge(VRegs, Reg);
157 }
158 
159 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
160                                    LLT MainTy, LLT &LeftoverTy,
161                                    SmallVectorImpl<Register> &VRegs,
162                                    SmallVectorImpl<Register> &LeftoverRegs) {
163   assert(!LeftoverTy.isValid() && "this is an out argument");
164 
165   unsigned RegSize = RegTy.getSizeInBits();
166   unsigned MainSize = MainTy.getSizeInBits();
167   unsigned NumParts = RegSize / MainSize;
168   unsigned LeftoverSize = RegSize - NumParts * MainSize;
169 
170   // Use an unmerge when possible.
171   if (LeftoverSize == 0) {
172     for (unsigned I = 0; I < NumParts; ++I)
173       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
174     MIRBuilder.buildUnmerge(VRegs, Reg);
175     return true;
176   }
177 
178   if (MainTy.isVector()) {
179     unsigned EltSize = MainTy.getScalarSizeInBits();
180     if (LeftoverSize % EltSize != 0)
181       return false;
182     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
183   } else {
184     LeftoverTy = LLT::scalar(LeftoverSize);
185   }
186 
187   // For irregular sizes, extract the individual parts.
188   for (unsigned I = 0; I != NumParts; ++I) {
189     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
190     VRegs.push_back(NewReg);
191     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
192   }
193 
194   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
195        Offset += LeftoverSize) {
196     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
197     LeftoverRegs.push_back(NewReg);
198     MIRBuilder.buildExtract(NewReg, Reg, Offset);
199   }
200 
201   return true;
202 }
203 
204 void LegalizerHelper::insertParts(Register DstReg,
205                                   LLT ResultTy, LLT PartTy,
206                                   ArrayRef<Register> PartRegs,
207                                   LLT LeftoverTy,
208                                   ArrayRef<Register> LeftoverRegs) {
209   if (!LeftoverTy.isValid()) {
210     assert(LeftoverRegs.empty());
211 
212     if (!ResultTy.isVector()) {
213       MIRBuilder.buildMerge(DstReg, PartRegs);
214       return;
215     }
216 
217     if (PartTy.isVector())
218       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
219     else
220       MIRBuilder.buildBuildVector(DstReg, PartRegs);
221     return;
222   }
223 
224   unsigned PartSize = PartTy.getSizeInBits();
225   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
226 
227   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
228   MIRBuilder.buildUndef(CurResultReg);
229 
230   unsigned Offset = 0;
231   for (Register PartReg : PartRegs) {
232     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
233     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
234     CurResultReg = NewResultReg;
235     Offset += PartSize;
236   }
237 
238   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
239     // Use the original output register for the final insert to avoid a copy.
240     Register NewResultReg = (I + 1 == E) ?
241       DstReg : MRI.createGenericVirtualRegister(ResultTy);
242 
243     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
244     CurResultReg = NewResultReg;
245     Offset += LeftoverPartSize;
246   }
247 }
248 
249 /// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
250 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
251                               const MachineInstr &MI) {
252   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
253 
254   const int NumResults = MI.getNumOperands() - 1;
255   Regs.resize(NumResults);
256   for (int I = 0; I != NumResults; ++I)
257     Regs[I] = MI.getOperand(I).getReg();
258 }
259 
260 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
261                                     LLT NarrowTy, Register SrcReg) {
262   LLT SrcTy = MRI.getType(SrcReg);
263 
264   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
265   if (SrcTy == GCDTy) {
266     // If the source already evenly divides the result type, we don't need to do
267     // anything.
268     Parts.push_back(SrcReg);
269   } else {
270     // Need to split into common type sized pieces.
271     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
272     getUnmergeResults(Parts, *Unmerge);
273   }
274 
275   return GCDTy;
276 }
277 
278 void LegalizerHelper::buildLCMMerge(Register DstReg, LLT NarrowTy, LLT GCDTy,
279                                     SmallVectorImpl<Register> &VRegs,
280                                     unsigned PadStrategy) {
281   LLT DstTy = MRI.getType(DstReg);
282   LLT LCMTy = getLCMType(DstTy, NarrowTy);
283 
284   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
285   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
286   int NumOrigSrc = VRegs.size();
287 
288   Register PadReg;
289 
290   // Get a value we can use to pad the source value if the sources won't evenly
291   // cover the result type.
292   if (NumOrigSrc < NumParts * NumSubParts) {
293     if (PadStrategy == TargetOpcode::G_ZEXT)
294       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
295     else if (PadStrategy == TargetOpcode::G_ANYEXT)
296       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
297     else {
298       assert(PadStrategy == TargetOpcode::G_SEXT);
299 
300       // Shift the sign bit of the low register through the high register.
301       auto ShiftAmt =
302         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
303       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
304     }
305   }
306 
307   // Registers for the final merge to be produced.
308   SmallVector<Register, 4> Remerge;
309   Remerge.resize(NumParts);
310 
311   // Registers needed for intermediate merges, which will be merged into a
312   // source for Remerge.
313   SmallVector<Register, 4> SubMerge;
314   SubMerge.resize(NumSubParts);
315 
316   // Once we've fully read off the end of the original source bits, we can reuse
317   // the same high bits for remaining padding elements.
318   Register AllPadReg;
319 
320   // Build merges to the LCM type to cover the original result type.
321   for (int I = 0; I != NumParts; ++I) {
322     bool AllMergePartsArePadding = true;
323 
324     // Build the requested merges to the requested type.
325     for (int J = 0; J != NumSubParts; ++J) {
326       int Idx = I * NumSubParts + J;
327       if (Idx >= NumOrigSrc) {
328         SubMerge[J] = PadReg;
329         continue;
330       }
331 
332       SubMerge[J] = VRegs[Idx];
333 
334       // There are meaningful bits here we can't reuse later.
335       AllMergePartsArePadding = false;
336     }
337 
338     // If we've filled up a complete piece with padding bits, we can directly
339     // emit the natural sized constant if applicable, rather than a merge of
340     // smaller constants.
341     if (AllMergePartsArePadding && !AllPadReg) {
342       if (PadStrategy == TargetOpcode::G_ANYEXT)
343         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
344       else if (PadStrategy == TargetOpcode::G_ZEXT)
345         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
346 
347       // If this is a sign extension, we can't materialize a trivial constant
348       // with the right type and have to produce a merge.
349     }
350 
351     if (AllPadReg) {
352       // Avoid creating additional instructions if we're just adding additional
353       // copies of padding bits.
354       Remerge[I] = AllPadReg;
355       continue;
356     }
357 
358     if (NumSubParts == 1)
359       Remerge[I] = SubMerge[0];
360     else
361       Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
362 
363     // In the sign extend padding case, re-use the first all-signbit merge.
364     if (AllMergePartsArePadding && !AllPadReg)
365       AllPadReg = Remerge[I];
366   }
367 
368   // Create the merge to the widened source, and extract the relevant bits into
369   // the result.
370   if (DstTy == LCMTy)
371     MIRBuilder.buildMerge(DstReg, Remerge);
372   else
373     MIRBuilder.buildTrunc(DstReg, MIRBuilder.buildMerge(LCMTy, Remerge));
374 }
375 
376 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
377   switch (Opcode) {
378   case TargetOpcode::G_SDIV:
379     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
380     switch (Size) {
381     case 32:
382       return RTLIB::SDIV_I32;
383     case 64:
384       return RTLIB::SDIV_I64;
385     case 128:
386       return RTLIB::SDIV_I128;
387     default:
388       llvm_unreachable("unexpected size");
389     }
390   case TargetOpcode::G_UDIV:
391     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
392     switch (Size) {
393     case 32:
394       return RTLIB::UDIV_I32;
395     case 64:
396       return RTLIB::UDIV_I64;
397     case 128:
398       return RTLIB::UDIV_I128;
399     default:
400       llvm_unreachable("unexpected size");
401     }
402   case TargetOpcode::G_SREM:
403     assert((Size == 32 || Size == 64) && "Unsupported size");
404     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
405   case TargetOpcode::G_UREM:
406     assert((Size == 32 || Size == 64) && "Unsupported size");
407     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
408   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
409     assert(Size == 32 && "Unsupported size");
410     return RTLIB::CTLZ_I32;
411   case TargetOpcode::G_FADD:
412     assert((Size == 32 || Size == 64) && "Unsupported size");
413     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
414   case TargetOpcode::G_FSUB:
415     assert((Size == 32 || Size == 64) && "Unsupported size");
416     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
417   case TargetOpcode::G_FMUL:
418     assert((Size == 32 || Size == 64) && "Unsupported size");
419     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
420   case TargetOpcode::G_FDIV:
421     assert((Size == 32 || Size == 64) && "Unsupported size");
422     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
423   case TargetOpcode::G_FEXP:
424     assert((Size == 32 || Size == 64) && "Unsupported size");
425     return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
426   case TargetOpcode::G_FEXP2:
427     assert((Size == 32 || Size == 64) && "Unsupported size");
428     return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
429   case TargetOpcode::G_FREM:
430     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
431   case TargetOpcode::G_FPOW:
432     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
433   case TargetOpcode::G_FMA:
434     assert((Size == 32 || Size == 64) && "Unsupported size");
435     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
436   case TargetOpcode::G_FSIN:
437     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
438     return Size == 128 ? RTLIB::SIN_F128
439                        : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
440   case TargetOpcode::G_FCOS:
441     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
442     return Size == 128 ? RTLIB::COS_F128
443                        : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
444   case TargetOpcode::G_FLOG10:
445     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
446     return Size == 128 ? RTLIB::LOG10_F128
447                        : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
448   case TargetOpcode::G_FLOG:
449     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
450     return Size == 128 ? RTLIB::LOG_F128
451                        : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
452   case TargetOpcode::G_FLOG2:
453     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
454     return Size == 128 ? RTLIB::LOG2_F128
455                        : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
456   case TargetOpcode::G_FCEIL:
457     assert((Size == 32 || Size == 64) && "Unsupported size");
458     return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
459   case TargetOpcode::G_FFLOOR:
460     assert((Size == 32 || Size == 64) && "Unsupported size");
461     return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
462   }
463   llvm_unreachable("Unknown libcall function");
464 }
465 
466 /// True if an instruction is in tail position in its caller. Intended for
467 /// legalizing libcalls as tail calls when possible.
468 static bool isLibCallInTailPosition(MachineInstr &MI) {
469   const Function &F = MI.getParent()->getParent()->getFunction();
470 
471   // Conservatively require the attributes of the call to match those of
472   // the return. Ignore NoAlias and NonNull because they don't affect the
473   // call sequence.
474   AttributeList CallerAttrs = F.getAttributes();
475   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
476           .removeAttribute(Attribute::NoAlias)
477           .removeAttribute(Attribute::NonNull)
478           .hasAttributes())
479     return false;
480 
481   // It's not safe to eliminate the sign / zero extension of the return value.
482   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
483       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
484     return false;
485 
486   // Only tail call if the following instruction is a standard return.
487   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
488   MachineInstr *Next = MI.getNextNode();
489   if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
490     return false;
491 
492   return true;
493 }
494 
495 LegalizerHelper::LegalizeResult
496 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
497                     const CallLowering::ArgInfo &Result,
498                     ArrayRef<CallLowering::ArgInfo> Args) {
499   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
500   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
501   const char *Name = TLI.getLibcallName(Libcall);
502 
503   CallLowering::CallLoweringInfo Info;
504   Info.CallConv = TLI.getLibcallCallingConv(Libcall);
505   Info.Callee = MachineOperand::CreateES(Name);
506   Info.OrigRet = Result;
507   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
508   if (!CLI.lowerCall(MIRBuilder, Info))
509     return LegalizerHelper::UnableToLegalize;
510 
511   return LegalizerHelper::Legalized;
512 }
513 
514 // Useful for libcalls where all operands have the same type.
515 static LegalizerHelper::LegalizeResult
516 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
517               Type *OpType) {
518   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
519 
520   SmallVector<CallLowering::ArgInfo, 3> Args;
521   for (unsigned i = 1; i < MI.getNumOperands(); i++)
522     Args.push_back({MI.getOperand(i).getReg(), OpType});
523   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
524                        Args);
525 }
526 
527 LegalizerHelper::LegalizeResult
528 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
529                        MachineInstr &MI) {
530   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
531   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
532 
533   SmallVector<CallLowering::ArgInfo, 3> Args;
534   // Add all the args, except for the last which is an imm denoting 'tail'.
535   for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
536     Register Reg = MI.getOperand(i).getReg();
537 
538     // Need derive an IR type for call lowering.
539     LLT OpLLT = MRI.getType(Reg);
540     Type *OpTy = nullptr;
541     if (OpLLT.isPointer())
542       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
543     else
544       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
545     Args.push_back({Reg, OpTy});
546   }
547 
548   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
549   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
550   Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
551   RTLIB::Libcall RTLibcall;
552   switch (ID) {
553   case Intrinsic::memcpy:
554     RTLibcall = RTLIB::MEMCPY;
555     break;
556   case Intrinsic::memset:
557     RTLibcall = RTLIB::MEMSET;
558     break;
559   case Intrinsic::memmove:
560     RTLibcall = RTLIB::MEMMOVE;
561     break;
562   default:
563     return LegalizerHelper::UnableToLegalize;
564   }
565   const char *Name = TLI.getLibcallName(RTLibcall);
566 
567   MIRBuilder.setInstr(MI);
568 
569   CallLowering::CallLoweringInfo Info;
570   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
571   Info.Callee = MachineOperand::CreateES(Name);
572   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
573   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
574                     isLibCallInTailPosition(MI);
575 
576   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
577   if (!CLI.lowerCall(MIRBuilder, Info))
578     return LegalizerHelper::UnableToLegalize;
579 
580   if (Info.LoweredTailCall) {
581     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
582     // We must have a return following the call to get past
583     // isLibCallInTailPosition.
584     assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
585            "Expected instr following MI to be a return?");
586 
587     // We lowered a tail call, so the call is now the return from the block.
588     // Delete the old return.
589     MI.getNextNode()->eraseFromParent();
590   }
591 
592   return LegalizerHelper::Legalized;
593 }
594 
595 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
596                                        Type *FromType) {
597   auto ToMVT = MVT::getVT(ToType);
598   auto FromMVT = MVT::getVT(FromType);
599 
600   switch (Opcode) {
601   case TargetOpcode::G_FPEXT:
602     return RTLIB::getFPEXT(FromMVT, ToMVT);
603   case TargetOpcode::G_FPTRUNC:
604     return RTLIB::getFPROUND(FromMVT, ToMVT);
605   case TargetOpcode::G_FPTOSI:
606     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
607   case TargetOpcode::G_FPTOUI:
608     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
609   case TargetOpcode::G_SITOFP:
610     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
611   case TargetOpcode::G_UITOFP:
612     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
613   }
614   llvm_unreachable("Unsupported libcall function");
615 }
616 
617 static LegalizerHelper::LegalizeResult
618 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
619                   Type *FromType) {
620   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
621   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
622                        {{MI.getOperand(1).getReg(), FromType}});
623 }
624 
625 LegalizerHelper::LegalizeResult
626 LegalizerHelper::libcall(MachineInstr &MI) {
627   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
628   unsigned Size = LLTy.getSizeInBits();
629   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
630 
631   MIRBuilder.setInstr(MI);
632 
633   switch (MI.getOpcode()) {
634   default:
635     return UnableToLegalize;
636   case TargetOpcode::G_SDIV:
637   case TargetOpcode::G_UDIV:
638   case TargetOpcode::G_SREM:
639   case TargetOpcode::G_UREM:
640   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
641     Type *HLTy = IntegerType::get(Ctx, Size);
642     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
643     if (Status != Legalized)
644       return Status;
645     break;
646   }
647   case TargetOpcode::G_FADD:
648   case TargetOpcode::G_FSUB:
649   case TargetOpcode::G_FMUL:
650   case TargetOpcode::G_FDIV:
651   case TargetOpcode::G_FMA:
652   case TargetOpcode::G_FPOW:
653   case TargetOpcode::G_FREM:
654   case TargetOpcode::G_FCOS:
655   case TargetOpcode::G_FSIN:
656   case TargetOpcode::G_FLOG10:
657   case TargetOpcode::G_FLOG:
658   case TargetOpcode::G_FLOG2:
659   case TargetOpcode::G_FEXP:
660   case TargetOpcode::G_FEXP2:
661   case TargetOpcode::G_FCEIL:
662   case TargetOpcode::G_FFLOOR: {
663     if (Size > 64) {
664       LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
665       return UnableToLegalize;
666     }
667     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
668     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
669     if (Status != Legalized)
670       return Status;
671     break;
672   }
673   case TargetOpcode::G_FPEXT: {
674     // FIXME: Support other floating point types (half, fp128 etc)
675     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
676     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
677     if (ToSize != 64 || FromSize != 32)
678       return UnableToLegalize;
679     LegalizeResult Status = conversionLibcall(
680         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
681     if (Status != Legalized)
682       return Status;
683     break;
684   }
685   case TargetOpcode::G_FPTRUNC: {
686     // FIXME: Support other floating point types (half, fp128 etc)
687     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
688     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
689     if (ToSize != 32 || FromSize != 64)
690       return UnableToLegalize;
691     LegalizeResult Status = conversionLibcall(
692         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
693     if (Status != Legalized)
694       return Status;
695     break;
696   }
697   case TargetOpcode::G_FPTOSI:
698   case TargetOpcode::G_FPTOUI: {
699     // FIXME: Support other types
700     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
701     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
702     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
703       return UnableToLegalize;
704     LegalizeResult Status = conversionLibcall(
705         MI, MIRBuilder,
706         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
707         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
708     if (Status != Legalized)
709       return Status;
710     break;
711   }
712   case TargetOpcode::G_SITOFP:
713   case TargetOpcode::G_UITOFP: {
714     // FIXME: Support other types
715     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
716     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
717     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
718       return UnableToLegalize;
719     LegalizeResult Status = conversionLibcall(
720         MI, MIRBuilder,
721         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
722         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
723     if (Status != Legalized)
724       return Status;
725     break;
726   }
727   }
728 
729   MI.eraseFromParent();
730   return Legalized;
731 }
732 
733 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
734                                                               unsigned TypeIdx,
735                                                               LLT NarrowTy) {
736   MIRBuilder.setInstr(MI);
737 
738   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
739   uint64_t NarrowSize = NarrowTy.getSizeInBits();
740 
741   switch (MI.getOpcode()) {
742   default:
743     return UnableToLegalize;
744   case TargetOpcode::G_IMPLICIT_DEF: {
745     // FIXME: add support for when SizeOp0 isn't an exact multiple of
746     // NarrowSize.
747     if (SizeOp0 % NarrowSize != 0)
748       return UnableToLegalize;
749     int NumParts = SizeOp0 / NarrowSize;
750 
751     SmallVector<Register, 2> DstRegs;
752     for (int i = 0; i < NumParts; ++i)
753       DstRegs.push_back(
754           MIRBuilder.buildUndef(NarrowTy).getReg(0));
755 
756     Register DstReg = MI.getOperand(0).getReg();
757     if(MRI.getType(DstReg).isVector())
758       MIRBuilder.buildBuildVector(DstReg, DstRegs);
759     else
760       MIRBuilder.buildMerge(DstReg, DstRegs);
761     MI.eraseFromParent();
762     return Legalized;
763   }
764   case TargetOpcode::G_CONSTANT: {
765     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
766     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
767     unsigned TotalSize = Ty.getSizeInBits();
768     unsigned NarrowSize = NarrowTy.getSizeInBits();
769     int NumParts = TotalSize / NarrowSize;
770 
771     SmallVector<Register, 4> PartRegs;
772     for (int I = 0; I != NumParts; ++I) {
773       unsigned Offset = I * NarrowSize;
774       auto K = MIRBuilder.buildConstant(NarrowTy,
775                                         Val.lshr(Offset).trunc(NarrowSize));
776       PartRegs.push_back(K.getReg(0));
777     }
778 
779     LLT LeftoverTy;
780     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
781     SmallVector<Register, 1> LeftoverRegs;
782     if (LeftoverBits != 0) {
783       LeftoverTy = LLT::scalar(LeftoverBits);
784       auto K = MIRBuilder.buildConstant(
785         LeftoverTy,
786         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
787       LeftoverRegs.push_back(K.getReg(0));
788     }
789 
790     insertParts(MI.getOperand(0).getReg(),
791                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
792 
793     MI.eraseFromParent();
794     return Legalized;
795   }
796   case TargetOpcode::G_SEXT:
797   case TargetOpcode::G_ZEXT:
798   case TargetOpcode::G_ANYEXT:
799     return narrowScalarExt(MI, TypeIdx, NarrowTy);
800   case TargetOpcode::G_TRUNC: {
801     if (TypeIdx != 1)
802       return UnableToLegalize;
803 
804     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
805     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
806       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
807       return UnableToLegalize;
808     }
809 
810     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
811     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
812     MI.eraseFromParent();
813     return Legalized;
814   }
815 
816   case TargetOpcode::G_ADD: {
817     // FIXME: add support for when SizeOp0 isn't an exact multiple of
818     // NarrowSize.
819     if (SizeOp0 % NarrowSize != 0)
820       return UnableToLegalize;
821     // Expand in terms of carry-setting/consuming G_ADDE instructions.
822     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
823 
824     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
825     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
826     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
827 
828     Register CarryIn;
829     for (int i = 0; i < NumParts; ++i) {
830       Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
831       Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
832 
833       if (i == 0)
834         MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
835       else {
836         MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
837                               Src2Regs[i], CarryIn);
838       }
839 
840       DstRegs.push_back(DstReg);
841       CarryIn = CarryOut;
842     }
843     Register DstReg = MI.getOperand(0).getReg();
844     if(MRI.getType(DstReg).isVector())
845       MIRBuilder.buildBuildVector(DstReg, DstRegs);
846     else
847       MIRBuilder.buildMerge(DstReg, DstRegs);
848     MI.eraseFromParent();
849     return Legalized;
850   }
851   case TargetOpcode::G_SUB: {
852     // FIXME: add support for when SizeOp0 isn't an exact multiple of
853     // NarrowSize.
854     if (SizeOp0 % NarrowSize != 0)
855       return UnableToLegalize;
856 
857     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
858 
859     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
860     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
861     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
862 
863     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
864     Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
865     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
866                           {Src1Regs[0], Src2Regs[0]});
867     DstRegs.push_back(DstReg);
868     Register BorrowIn = BorrowOut;
869     for (int i = 1; i < NumParts; ++i) {
870       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
871       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
872 
873       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
874                             {Src1Regs[i], Src2Regs[i], BorrowIn});
875 
876       DstRegs.push_back(DstReg);
877       BorrowIn = BorrowOut;
878     }
879     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
880     MI.eraseFromParent();
881     return Legalized;
882   }
883   case TargetOpcode::G_MUL:
884   case TargetOpcode::G_UMULH:
885     return narrowScalarMul(MI, NarrowTy);
886   case TargetOpcode::G_EXTRACT:
887     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
888   case TargetOpcode::G_INSERT:
889     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
890   case TargetOpcode::G_LOAD: {
891     const auto &MMO = **MI.memoperands_begin();
892     Register DstReg = MI.getOperand(0).getReg();
893     LLT DstTy = MRI.getType(DstReg);
894     if (DstTy.isVector())
895       return UnableToLegalize;
896 
897     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
898       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
899       auto &MMO = **MI.memoperands_begin();
900       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
901       MIRBuilder.buildAnyExt(DstReg, TmpReg);
902       MI.eraseFromParent();
903       return Legalized;
904     }
905 
906     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
907   }
908   case TargetOpcode::G_ZEXTLOAD:
909   case TargetOpcode::G_SEXTLOAD: {
910     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
911     Register DstReg = MI.getOperand(0).getReg();
912     Register PtrReg = MI.getOperand(1).getReg();
913 
914     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
915     auto &MMO = **MI.memoperands_begin();
916     if (MMO.getSizeInBits() == NarrowSize) {
917       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
918     } else {
919       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
920     }
921 
922     if (ZExt)
923       MIRBuilder.buildZExt(DstReg, TmpReg);
924     else
925       MIRBuilder.buildSExt(DstReg, TmpReg);
926 
927     MI.eraseFromParent();
928     return Legalized;
929   }
930   case TargetOpcode::G_STORE: {
931     const auto &MMO = **MI.memoperands_begin();
932 
933     Register SrcReg = MI.getOperand(0).getReg();
934     LLT SrcTy = MRI.getType(SrcReg);
935     if (SrcTy.isVector())
936       return UnableToLegalize;
937 
938     int NumParts = SizeOp0 / NarrowSize;
939     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
940     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
941     if (SrcTy.isVector() && LeftoverBits != 0)
942       return UnableToLegalize;
943 
944     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
945       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
946       auto &MMO = **MI.memoperands_begin();
947       MIRBuilder.buildTrunc(TmpReg, SrcReg);
948       MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
949       MI.eraseFromParent();
950       return Legalized;
951     }
952 
953     return reduceLoadStoreWidth(MI, 0, NarrowTy);
954   }
955   case TargetOpcode::G_SELECT:
956     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
957   case TargetOpcode::G_AND:
958   case TargetOpcode::G_OR:
959   case TargetOpcode::G_XOR: {
960     // Legalize bitwise operation:
961     // A = BinOp<Ty> B, C
962     // into:
963     // B1, ..., BN = G_UNMERGE_VALUES B
964     // C1, ..., CN = G_UNMERGE_VALUES C
965     // A1 = BinOp<Ty/N> B1, C2
966     // ...
967     // AN = BinOp<Ty/N> BN, CN
968     // A = G_MERGE_VALUES A1, ..., AN
969     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
970   }
971   case TargetOpcode::G_SHL:
972   case TargetOpcode::G_LSHR:
973   case TargetOpcode::G_ASHR:
974     return narrowScalarShift(MI, TypeIdx, NarrowTy);
975   case TargetOpcode::G_CTLZ:
976   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
977   case TargetOpcode::G_CTTZ:
978   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
979   case TargetOpcode::G_CTPOP:
980     if (TypeIdx != 0)
981       return UnableToLegalize; // TODO
982 
983     Observer.changingInstr(MI);
984     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
985     Observer.changedInstr(MI);
986     return Legalized;
987   case TargetOpcode::G_INTTOPTR:
988     if (TypeIdx != 1)
989       return UnableToLegalize;
990 
991     Observer.changingInstr(MI);
992     narrowScalarSrc(MI, NarrowTy, 1);
993     Observer.changedInstr(MI);
994     return Legalized;
995   case TargetOpcode::G_PTRTOINT:
996     if (TypeIdx != 0)
997       return UnableToLegalize;
998 
999     Observer.changingInstr(MI);
1000     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1001     Observer.changedInstr(MI);
1002     return Legalized;
1003   case TargetOpcode::G_PHI: {
1004     unsigned NumParts = SizeOp0 / NarrowSize;
1005     SmallVector<Register, 2> DstRegs;
1006     SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
1007     DstRegs.resize(NumParts);
1008     SrcRegs.resize(MI.getNumOperands() / 2);
1009     Observer.changingInstr(MI);
1010     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1011       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1012       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1013       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1014                    SrcRegs[i / 2]);
1015     }
1016     MachineBasicBlock &MBB = *MI.getParent();
1017     MIRBuilder.setInsertPt(MBB, MI);
1018     for (unsigned i = 0; i < NumParts; ++i) {
1019       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1020       MachineInstrBuilder MIB =
1021           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1022       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1023         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1024     }
1025     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1026     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1027     Observer.changedInstr(MI);
1028     MI.eraseFromParent();
1029     return Legalized;
1030   }
1031   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1032   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1033     if (TypeIdx != 2)
1034       return UnableToLegalize;
1035 
1036     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1037     Observer.changingInstr(MI);
1038     narrowScalarSrc(MI, NarrowTy, OpIdx);
1039     Observer.changedInstr(MI);
1040     return Legalized;
1041   }
1042   case TargetOpcode::G_ICMP: {
1043     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1044     if (NarrowSize * 2 != SrcSize)
1045       return UnableToLegalize;
1046 
1047     Observer.changingInstr(MI);
1048     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1049     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1050     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1051 
1052     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1053     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1054     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1055 
1056     CmpInst::Predicate Pred =
1057         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1058     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1059 
1060     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1061       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1062       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1063       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1064       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1065       MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1066     } else {
1067       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1068       MachineInstrBuilder CmpHEQ =
1069           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1070       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1071           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1072       MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1073     }
1074     Observer.changedInstr(MI);
1075     MI.eraseFromParent();
1076     return Legalized;
1077   }
1078   case TargetOpcode::G_SEXT_INREG: {
1079     if (TypeIdx != 0)
1080       return UnableToLegalize;
1081 
1082     if (!MI.getOperand(2).isImm())
1083       return UnableToLegalize;
1084     int64_t SizeInBits = MI.getOperand(2).getImm();
1085 
1086     // So long as the new type has more bits than the bits we're extending we
1087     // don't need to break it apart.
1088     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1089       Observer.changingInstr(MI);
1090       // We don't lose any non-extension bits by truncating the src and
1091       // sign-extending the dst.
1092       MachineOperand &MO1 = MI.getOperand(1);
1093       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1094       MO1.setReg(TruncMIB.getReg(0));
1095 
1096       MachineOperand &MO2 = MI.getOperand(0);
1097       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1098       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1099       MIRBuilder.buildSExt(MO2, DstExt);
1100       MO2.setReg(DstExt);
1101       Observer.changedInstr(MI);
1102       return Legalized;
1103     }
1104 
1105     // Break it apart. Components below the extension point are unmodified. The
1106     // component containing the extension point becomes a narrower SEXT_INREG.
1107     // Components above it are ashr'd from the component containing the
1108     // extension point.
1109     if (SizeOp0 % NarrowSize != 0)
1110       return UnableToLegalize;
1111     int NumParts = SizeOp0 / NarrowSize;
1112 
1113     // List the registers where the destination will be scattered.
1114     SmallVector<Register, 2> DstRegs;
1115     // List the registers where the source will be split.
1116     SmallVector<Register, 2> SrcRegs;
1117 
1118     // Create all the temporary registers.
1119     for (int i = 0; i < NumParts; ++i) {
1120       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1121 
1122       SrcRegs.push_back(SrcReg);
1123     }
1124 
1125     // Explode the big arguments into smaller chunks.
1126     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1127 
1128     Register AshrCstReg =
1129         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1130             .getReg(0);
1131     Register FullExtensionReg = 0;
1132     Register PartialExtensionReg = 0;
1133 
1134     // Do the operation on each small part.
1135     for (int i = 0; i < NumParts; ++i) {
1136       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1137         DstRegs.push_back(SrcRegs[i]);
1138       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1139         assert(PartialExtensionReg &&
1140                "Expected to visit partial extension before full");
1141         if (FullExtensionReg) {
1142           DstRegs.push_back(FullExtensionReg);
1143           continue;
1144         }
1145         DstRegs.push_back(
1146             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1147                 .getReg(0));
1148         FullExtensionReg = DstRegs.back();
1149       } else {
1150         DstRegs.push_back(
1151             MIRBuilder
1152                 .buildInstr(
1153                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
1154                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1155                 .getReg(0));
1156         PartialExtensionReg = DstRegs.back();
1157       }
1158     }
1159 
1160     // Gather the destination registers into the final destination.
1161     Register DstReg = MI.getOperand(0).getReg();
1162     MIRBuilder.buildMerge(DstReg, DstRegs);
1163     MI.eraseFromParent();
1164     return Legalized;
1165   }
1166   case TargetOpcode::G_BSWAP:
1167   case TargetOpcode::G_BITREVERSE: {
1168     if (SizeOp0 % NarrowSize != 0)
1169       return UnableToLegalize;
1170 
1171     Observer.changingInstr(MI);
1172     SmallVector<Register, 2> SrcRegs, DstRegs;
1173     unsigned NumParts = SizeOp0 / NarrowSize;
1174     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1175 
1176     for (unsigned i = 0; i < NumParts; ++i) {
1177       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1178                                            {SrcRegs[NumParts - 1 - i]});
1179       DstRegs.push_back(DstPart.getReg(0));
1180     }
1181 
1182     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1183 
1184     Observer.changedInstr(MI);
1185     MI.eraseFromParent();
1186     return Legalized;
1187   }
1188   }
1189 }
1190 
1191 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1192                                      unsigned OpIdx, unsigned ExtOpcode) {
1193   MachineOperand &MO = MI.getOperand(OpIdx);
1194   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1195   MO.setReg(ExtB.getReg(0));
1196 }
1197 
1198 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1199                                       unsigned OpIdx) {
1200   MachineOperand &MO = MI.getOperand(OpIdx);
1201   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1202   MO.setReg(ExtB.getReg(0));
1203 }
1204 
1205 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1206                                      unsigned OpIdx, unsigned TruncOpcode) {
1207   MachineOperand &MO = MI.getOperand(OpIdx);
1208   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1209   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1210   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1211   MO.setReg(DstExt);
1212 }
1213 
1214 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1215                                       unsigned OpIdx, unsigned ExtOpcode) {
1216   MachineOperand &MO = MI.getOperand(OpIdx);
1217   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1218   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1219   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1220   MO.setReg(DstTrunc);
1221 }
1222 
1223 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1224                                             unsigned OpIdx) {
1225   MachineOperand &MO = MI.getOperand(OpIdx);
1226   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1227   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1228   MIRBuilder.buildExtract(MO, DstExt, 0);
1229   MO.setReg(DstExt);
1230 }
1231 
1232 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1233                                             unsigned OpIdx) {
1234   MachineOperand &MO = MI.getOperand(OpIdx);
1235 
1236   LLT OldTy = MRI.getType(MO.getReg());
1237   unsigned OldElts = OldTy.getNumElements();
1238   unsigned NewElts = MoreTy.getNumElements();
1239 
1240   unsigned NumParts = NewElts / OldElts;
1241 
1242   // Use concat_vectors if the result is a multiple of the number of elements.
1243   if (NumParts * OldElts == NewElts) {
1244     SmallVector<Register, 8> Parts;
1245     Parts.push_back(MO.getReg());
1246 
1247     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1248     for (unsigned I = 1; I != NumParts; ++I)
1249       Parts.push_back(ImpDef);
1250 
1251     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1252     MO.setReg(Concat.getReg(0));
1253     return;
1254   }
1255 
1256   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1257   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1258   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1259   MO.setReg(MoreReg);
1260 }
1261 
1262 LegalizerHelper::LegalizeResult
1263 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1264                                         LLT WideTy) {
1265   if (TypeIdx != 1)
1266     return UnableToLegalize;
1267 
1268   Register DstReg = MI.getOperand(0).getReg();
1269   LLT DstTy = MRI.getType(DstReg);
1270   if (DstTy.isVector())
1271     return UnableToLegalize;
1272 
1273   Register Src1 = MI.getOperand(1).getReg();
1274   LLT SrcTy = MRI.getType(Src1);
1275   const int DstSize = DstTy.getSizeInBits();
1276   const int SrcSize = SrcTy.getSizeInBits();
1277   const int WideSize = WideTy.getSizeInBits();
1278   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1279 
1280   unsigned NumOps = MI.getNumOperands();
1281   unsigned NumSrc = MI.getNumOperands() - 1;
1282   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1283 
1284   if (WideSize >= DstSize) {
1285     // Directly pack the bits in the target type.
1286     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1287 
1288     for (unsigned I = 2; I != NumOps; ++I) {
1289       const unsigned Offset = (I - 1) * PartSize;
1290 
1291       Register SrcReg = MI.getOperand(I).getReg();
1292       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1293 
1294       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1295 
1296       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1297         MRI.createGenericVirtualRegister(WideTy);
1298 
1299       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1300       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1301       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1302       ResultReg = NextResult;
1303     }
1304 
1305     if (WideSize > DstSize)
1306       MIRBuilder.buildTrunc(DstReg, ResultReg);
1307     else if (DstTy.isPointer())
1308       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1309 
1310     MI.eraseFromParent();
1311     return Legalized;
1312   }
1313 
1314   // Unmerge the original values to the GCD type, and recombine to the next
1315   // multiple greater than the original type.
1316   //
1317   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1318   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1319   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1320   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1321   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1322   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1323   // %12:_(s12) = G_MERGE_VALUES %10, %11
1324   //
1325   // Padding with undef if necessary:
1326   //
1327   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1328   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1329   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1330   // %7:_(s2) = G_IMPLICIT_DEF
1331   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1332   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1333   // %10:_(s12) = G_MERGE_VALUES %8, %9
1334 
1335   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1336   LLT GCDTy = LLT::scalar(GCD);
1337 
1338   SmallVector<Register, 8> Parts;
1339   SmallVector<Register, 8> NewMergeRegs;
1340   SmallVector<Register, 8> Unmerges;
1341   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1342 
1343   // Decompose the original operands if they don't evenly divide.
1344   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1345     Register SrcReg = MI.getOperand(I).getReg();
1346     if (GCD == SrcSize) {
1347       Unmerges.push_back(SrcReg);
1348     } else {
1349       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1350       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1351         Unmerges.push_back(Unmerge.getReg(J));
1352     }
1353   }
1354 
1355   // Pad with undef to the next size that is a multiple of the requested size.
1356   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1357     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1358     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1359       Unmerges.push_back(UndefReg);
1360   }
1361 
1362   const int PartsPerGCD = WideSize / GCD;
1363 
1364   // Build merges of each piece.
1365   ArrayRef<Register> Slicer(Unmerges);
1366   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1367     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1368     NewMergeRegs.push_back(Merge.getReg(0));
1369   }
1370 
1371   // A truncate may be necessary if the requested type doesn't evenly divide the
1372   // original result type.
1373   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1374     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1375   } else {
1376     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1377     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1378   }
1379 
1380   MI.eraseFromParent();
1381   return Legalized;
1382 }
1383 
1384 LegalizerHelper::LegalizeResult
1385 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1386                                           LLT WideTy) {
1387   if (TypeIdx != 0)
1388     return UnableToLegalize;
1389 
1390   unsigned NumDst = MI.getNumOperands() - 1;
1391   Register SrcReg = MI.getOperand(NumDst).getReg();
1392   LLT SrcTy = MRI.getType(SrcReg);
1393   if (!SrcTy.isScalar())
1394     return UnableToLegalize;
1395 
1396   Register Dst0Reg = MI.getOperand(0).getReg();
1397   LLT DstTy = MRI.getType(Dst0Reg);
1398   if (!DstTy.isScalar())
1399     return UnableToLegalize;
1400 
1401   unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1402   LLT NewSrcTy = LLT::scalar(NewSrcSize);
1403   unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1404 
1405   auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1406 
1407   for (unsigned I = 1; I != NumDst; ++I) {
1408     auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1409     auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1410     WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1411   }
1412 
1413   Observer.changingInstr(MI);
1414 
1415   MI.getOperand(NumDst).setReg(WideSrc.getReg(0));
1416   for (unsigned I = 0; I != NumDst; ++I)
1417     widenScalarDst(MI, WideTy, I);
1418 
1419   Observer.changedInstr(MI);
1420 
1421   return Legalized;
1422 }
1423 
1424 LegalizerHelper::LegalizeResult
1425 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1426                                     LLT WideTy) {
1427   Register DstReg = MI.getOperand(0).getReg();
1428   Register SrcReg = MI.getOperand(1).getReg();
1429   LLT SrcTy = MRI.getType(SrcReg);
1430 
1431   LLT DstTy = MRI.getType(DstReg);
1432   unsigned Offset = MI.getOperand(2).getImm();
1433 
1434   if (TypeIdx == 0) {
1435     if (SrcTy.isVector() || DstTy.isVector())
1436       return UnableToLegalize;
1437 
1438     SrcOp Src(SrcReg);
1439     if (SrcTy.isPointer()) {
1440       // Extracts from pointers can be handled only if they are really just
1441       // simple integers.
1442       const DataLayout &DL = MIRBuilder.getDataLayout();
1443       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1444         return UnableToLegalize;
1445 
1446       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1447       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1448       SrcTy = SrcAsIntTy;
1449     }
1450 
1451     if (DstTy.isPointer())
1452       return UnableToLegalize;
1453 
1454     if (Offset == 0) {
1455       // Avoid a shift in the degenerate case.
1456       MIRBuilder.buildTrunc(DstReg,
1457                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1458       MI.eraseFromParent();
1459       return Legalized;
1460     }
1461 
1462     // Do a shift in the source type.
1463     LLT ShiftTy = SrcTy;
1464     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1465       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1466       ShiftTy = WideTy;
1467     } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1468       return UnableToLegalize;
1469 
1470     auto LShr = MIRBuilder.buildLShr(
1471       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1472     MIRBuilder.buildTrunc(DstReg, LShr);
1473     MI.eraseFromParent();
1474     return Legalized;
1475   }
1476 
1477   if (SrcTy.isScalar()) {
1478     Observer.changingInstr(MI);
1479     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1480     Observer.changedInstr(MI);
1481     return Legalized;
1482   }
1483 
1484   if (!SrcTy.isVector())
1485     return UnableToLegalize;
1486 
1487   if (DstTy != SrcTy.getElementType())
1488     return UnableToLegalize;
1489 
1490   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1491     return UnableToLegalize;
1492 
1493   Observer.changingInstr(MI);
1494   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1495 
1496   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1497                           Offset);
1498   widenScalarDst(MI, WideTy.getScalarType(), 0);
1499   Observer.changedInstr(MI);
1500   return Legalized;
1501 }
1502 
1503 LegalizerHelper::LegalizeResult
1504 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1505                                    LLT WideTy) {
1506   if (TypeIdx != 0)
1507     return UnableToLegalize;
1508   Observer.changingInstr(MI);
1509   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1510   widenScalarDst(MI, WideTy);
1511   Observer.changedInstr(MI);
1512   return Legalized;
1513 }
1514 
1515 LegalizerHelper::LegalizeResult
1516 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1517   MIRBuilder.setInstr(MI);
1518 
1519   switch (MI.getOpcode()) {
1520   default:
1521     return UnableToLegalize;
1522   case TargetOpcode::G_EXTRACT:
1523     return widenScalarExtract(MI, TypeIdx, WideTy);
1524   case TargetOpcode::G_INSERT:
1525     return widenScalarInsert(MI, TypeIdx, WideTy);
1526   case TargetOpcode::G_MERGE_VALUES:
1527     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1528   case TargetOpcode::G_UNMERGE_VALUES:
1529     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1530   case TargetOpcode::G_UADDO:
1531   case TargetOpcode::G_USUBO: {
1532     if (TypeIdx == 1)
1533       return UnableToLegalize; // TODO
1534     auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
1535     auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
1536     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1537                           ? TargetOpcode::G_ADD
1538                           : TargetOpcode::G_SUB;
1539     // Do the arithmetic in the larger type.
1540     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1541     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1542     APInt Mask =
1543         APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
1544     auto AndOp = MIRBuilder.buildAnd(
1545         WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
1546     // There is no overflow if the AndOp is the same as NewOp.
1547     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
1548     // Now trunc the NewOp to the original result.
1549     MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1550     MI.eraseFromParent();
1551     return Legalized;
1552   }
1553   case TargetOpcode::G_CTTZ:
1554   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1555   case TargetOpcode::G_CTLZ:
1556   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1557   case TargetOpcode::G_CTPOP: {
1558     if (TypeIdx == 0) {
1559       Observer.changingInstr(MI);
1560       widenScalarDst(MI, WideTy, 0);
1561       Observer.changedInstr(MI);
1562       return Legalized;
1563     }
1564 
1565     Register SrcReg = MI.getOperand(1).getReg();
1566 
1567     // First ZEXT the input.
1568     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1569     LLT CurTy = MRI.getType(SrcReg);
1570     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1571       // The count is the same in the larger type except if the original
1572       // value was zero.  This can be handled by setting the bit just off
1573       // the top of the original type.
1574       auto TopBit =
1575           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1576       MIBSrc = MIRBuilder.buildOr(
1577         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1578     }
1579 
1580     // Perform the operation at the larger size.
1581     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1582     // This is already the correct result for CTPOP and CTTZs
1583     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1584         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1585       // The correct result is NewOp - (Difference in widety and current ty).
1586       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1587       MIBNewOp = MIRBuilder.buildSub(
1588           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1589     }
1590 
1591     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1592     MI.eraseFromParent();
1593     return Legalized;
1594   }
1595   case TargetOpcode::G_BSWAP: {
1596     Observer.changingInstr(MI);
1597     Register DstReg = MI.getOperand(0).getReg();
1598 
1599     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1600     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1601     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1602     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1603 
1604     MI.getOperand(0).setReg(DstExt);
1605 
1606     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1607 
1608     LLT Ty = MRI.getType(DstReg);
1609     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1610     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1611     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1612 
1613     MIRBuilder.buildTrunc(DstReg, ShrReg);
1614     Observer.changedInstr(MI);
1615     return Legalized;
1616   }
1617   case TargetOpcode::G_BITREVERSE: {
1618     Observer.changingInstr(MI);
1619 
1620     Register DstReg = MI.getOperand(0).getReg();
1621     LLT Ty = MRI.getType(DstReg);
1622     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1623 
1624     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1625     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1626     MI.getOperand(0).setReg(DstExt);
1627     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1628 
1629     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1630     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1631     MIRBuilder.buildTrunc(DstReg, Shift);
1632     Observer.changedInstr(MI);
1633     return Legalized;
1634   }
1635   case TargetOpcode::G_ADD:
1636   case TargetOpcode::G_AND:
1637   case TargetOpcode::G_MUL:
1638   case TargetOpcode::G_OR:
1639   case TargetOpcode::G_XOR:
1640   case TargetOpcode::G_SUB:
1641     // Perform operation at larger width (any extension is fines here, high bits
1642     // don't affect the result) and then truncate the result back to the
1643     // original type.
1644     Observer.changingInstr(MI);
1645     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1646     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1647     widenScalarDst(MI, WideTy);
1648     Observer.changedInstr(MI);
1649     return Legalized;
1650 
1651   case TargetOpcode::G_SHL:
1652     Observer.changingInstr(MI);
1653 
1654     if (TypeIdx == 0) {
1655       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1656       widenScalarDst(MI, WideTy);
1657     } else {
1658       assert(TypeIdx == 1);
1659       // The "number of bits to shift" operand must preserve its value as an
1660       // unsigned integer:
1661       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1662     }
1663 
1664     Observer.changedInstr(MI);
1665     return Legalized;
1666 
1667   case TargetOpcode::G_SDIV:
1668   case TargetOpcode::G_SREM:
1669   case TargetOpcode::G_SMIN:
1670   case TargetOpcode::G_SMAX:
1671     Observer.changingInstr(MI);
1672     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1673     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1674     widenScalarDst(MI, WideTy);
1675     Observer.changedInstr(MI);
1676     return Legalized;
1677 
1678   case TargetOpcode::G_ASHR:
1679   case TargetOpcode::G_LSHR:
1680     Observer.changingInstr(MI);
1681 
1682     if (TypeIdx == 0) {
1683       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1684         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1685 
1686       widenScalarSrc(MI, WideTy, 1, CvtOp);
1687       widenScalarDst(MI, WideTy);
1688     } else {
1689       assert(TypeIdx == 1);
1690       // The "number of bits to shift" operand must preserve its value as an
1691       // unsigned integer:
1692       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1693     }
1694 
1695     Observer.changedInstr(MI);
1696     return Legalized;
1697   case TargetOpcode::G_UDIV:
1698   case TargetOpcode::G_UREM:
1699   case TargetOpcode::G_UMIN:
1700   case TargetOpcode::G_UMAX:
1701     Observer.changingInstr(MI);
1702     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1703     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1704     widenScalarDst(MI, WideTy);
1705     Observer.changedInstr(MI);
1706     return Legalized;
1707 
1708   case TargetOpcode::G_SELECT:
1709     Observer.changingInstr(MI);
1710     if (TypeIdx == 0) {
1711       // Perform operation at larger width (any extension is fine here, high
1712       // bits don't affect the result) and then truncate the result back to the
1713       // original type.
1714       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1715       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1716       widenScalarDst(MI, WideTy);
1717     } else {
1718       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1719       // Explicit extension is required here since high bits affect the result.
1720       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1721     }
1722     Observer.changedInstr(MI);
1723     return Legalized;
1724 
1725   case TargetOpcode::G_FPTOSI:
1726   case TargetOpcode::G_FPTOUI:
1727     Observer.changingInstr(MI);
1728 
1729     if (TypeIdx == 0)
1730       widenScalarDst(MI, WideTy);
1731     else
1732       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1733 
1734     Observer.changedInstr(MI);
1735     return Legalized;
1736   case TargetOpcode::G_SITOFP:
1737     if (TypeIdx != 1)
1738       return UnableToLegalize;
1739     Observer.changingInstr(MI);
1740     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1741     Observer.changedInstr(MI);
1742     return Legalized;
1743 
1744   case TargetOpcode::G_UITOFP:
1745     if (TypeIdx != 1)
1746       return UnableToLegalize;
1747     Observer.changingInstr(MI);
1748     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1749     Observer.changedInstr(MI);
1750     return Legalized;
1751 
1752   case TargetOpcode::G_LOAD:
1753   case TargetOpcode::G_SEXTLOAD:
1754   case TargetOpcode::G_ZEXTLOAD:
1755     Observer.changingInstr(MI);
1756     widenScalarDst(MI, WideTy);
1757     Observer.changedInstr(MI);
1758     return Legalized;
1759 
1760   case TargetOpcode::G_STORE: {
1761     if (TypeIdx != 0)
1762       return UnableToLegalize;
1763 
1764     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1765     if (!isPowerOf2_32(Ty.getSizeInBits()))
1766       return UnableToLegalize;
1767 
1768     Observer.changingInstr(MI);
1769 
1770     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1771       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1772     widenScalarSrc(MI, WideTy, 0, ExtType);
1773 
1774     Observer.changedInstr(MI);
1775     return Legalized;
1776   }
1777   case TargetOpcode::G_CONSTANT: {
1778     MachineOperand &SrcMO = MI.getOperand(1);
1779     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1780     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
1781         MRI.getType(MI.getOperand(0).getReg()));
1782     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
1783             ExtOpc == TargetOpcode::G_ANYEXT) &&
1784            "Illegal Extend");
1785     const APInt &SrcVal = SrcMO.getCImm()->getValue();
1786     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
1787                            ? SrcVal.sext(WideTy.getSizeInBits())
1788                            : SrcVal.zext(WideTy.getSizeInBits());
1789     Observer.changingInstr(MI);
1790     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1791 
1792     widenScalarDst(MI, WideTy);
1793     Observer.changedInstr(MI);
1794     return Legalized;
1795   }
1796   case TargetOpcode::G_FCONSTANT: {
1797     MachineOperand &SrcMO = MI.getOperand(1);
1798     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1799     APFloat Val = SrcMO.getFPImm()->getValueAPF();
1800     bool LosesInfo;
1801     switch (WideTy.getSizeInBits()) {
1802     case 32:
1803       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1804                   &LosesInfo);
1805       break;
1806     case 64:
1807       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1808                   &LosesInfo);
1809       break;
1810     default:
1811       return UnableToLegalize;
1812     }
1813 
1814     assert(!LosesInfo && "extend should always be lossless");
1815 
1816     Observer.changingInstr(MI);
1817     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1818 
1819     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1820     Observer.changedInstr(MI);
1821     return Legalized;
1822   }
1823   case TargetOpcode::G_IMPLICIT_DEF: {
1824     Observer.changingInstr(MI);
1825     widenScalarDst(MI, WideTy);
1826     Observer.changedInstr(MI);
1827     return Legalized;
1828   }
1829   case TargetOpcode::G_BRCOND:
1830     Observer.changingInstr(MI);
1831     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1832     Observer.changedInstr(MI);
1833     return Legalized;
1834 
1835   case TargetOpcode::G_FCMP:
1836     Observer.changingInstr(MI);
1837     if (TypeIdx == 0)
1838       widenScalarDst(MI, WideTy);
1839     else {
1840       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1841       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1842     }
1843     Observer.changedInstr(MI);
1844     return Legalized;
1845 
1846   case TargetOpcode::G_ICMP:
1847     Observer.changingInstr(MI);
1848     if (TypeIdx == 0)
1849       widenScalarDst(MI, WideTy);
1850     else {
1851       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1852                                MI.getOperand(1).getPredicate()))
1853                                ? TargetOpcode::G_SEXT
1854                                : TargetOpcode::G_ZEXT;
1855       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1856       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1857     }
1858     Observer.changedInstr(MI);
1859     return Legalized;
1860 
1861   case TargetOpcode::G_PTR_ADD:
1862     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
1863     Observer.changingInstr(MI);
1864     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1865     Observer.changedInstr(MI);
1866     return Legalized;
1867 
1868   case TargetOpcode::G_PHI: {
1869     assert(TypeIdx == 0 && "Expecting only Idx 0");
1870 
1871     Observer.changingInstr(MI);
1872     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1873       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1874       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1875       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1876     }
1877 
1878     MachineBasicBlock &MBB = *MI.getParent();
1879     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1880     widenScalarDst(MI, WideTy);
1881     Observer.changedInstr(MI);
1882     return Legalized;
1883   }
1884   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1885     if (TypeIdx == 0) {
1886       Register VecReg = MI.getOperand(1).getReg();
1887       LLT VecTy = MRI.getType(VecReg);
1888       Observer.changingInstr(MI);
1889 
1890       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1891                                      WideTy.getSizeInBits()),
1892                      1, TargetOpcode::G_SEXT);
1893 
1894       widenScalarDst(MI, WideTy, 0);
1895       Observer.changedInstr(MI);
1896       return Legalized;
1897     }
1898 
1899     if (TypeIdx != 2)
1900       return UnableToLegalize;
1901     Observer.changingInstr(MI);
1902     // TODO: Probably should be zext
1903     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1904     Observer.changedInstr(MI);
1905     return Legalized;
1906   }
1907   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1908     if (TypeIdx == 1) {
1909       Observer.changingInstr(MI);
1910 
1911       Register VecReg = MI.getOperand(1).getReg();
1912       LLT VecTy = MRI.getType(VecReg);
1913       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
1914 
1915       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
1916       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1917       widenScalarDst(MI, WideVecTy, 0);
1918       Observer.changedInstr(MI);
1919       return Legalized;
1920     }
1921 
1922     if (TypeIdx == 2) {
1923       Observer.changingInstr(MI);
1924       // TODO: Probably should be zext
1925       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
1926       Observer.changedInstr(MI);
1927     }
1928 
1929     return Legalized;
1930   }
1931   case TargetOpcode::G_FADD:
1932   case TargetOpcode::G_FMUL:
1933   case TargetOpcode::G_FSUB:
1934   case TargetOpcode::G_FMA:
1935   case TargetOpcode::G_FMAD:
1936   case TargetOpcode::G_FNEG:
1937   case TargetOpcode::G_FABS:
1938   case TargetOpcode::G_FCANONICALIZE:
1939   case TargetOpcode::G_FMINNUM:
1940   case TargetOpcode::G_FMAXNUM:
1941   case TargetOpcode::G_FMINNUM_IEEE:
1942   case TargetOpcode::G_FMAXNUM_IEEE:
1943   case TargetOpcode::G_FMINIMUM:
1944   case TargetOpcode::G_FMAXIMUM:
1945   case TargetOpcode::G_FDIV:
1946   case TargetOpcode::G_FREM:
1947   case TargetOpcode::G_FCEIL:
1948   case TargetOpcode::G_FFLOOR:
1949   case TargetOpcode::G_FCOS:
1950   case TargetOpcode::G_FSIN:
1951   case TargetOpcode::G_FLOG10:
1952   case TargetOpcode::G_FLOG:
1953   case TargetOpcode::G_FLOG2:
1954   case TargetOpcode::G_FRINT:
1955   case TargetOpcode::G_FNEARBYINT:
1956   case TargetOpcode::G_FSQRT:
1957   case TargetOpcode::G_FEXP:
1958   case TargetOpcode::G_FEXP2:
1959   case TargetOpcode::G_FPOW:
1960   case TargetOpcode::G_INTRINSIC_TRUNC:
1961   case TargetOpcode::G_INTRINSIC_ROUND:
1962     assert(TypeIdx == 0);
1963     Observer.changingInstr(MI);
1964 
1965     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1966       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1967 
1968     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1969     Observer.changedInstr(MI);
1970     return Legalized;
1971   case TargetOpcode::G_INTTOPTR:
1972     if (TypeIdx != 1)
1973       return UnableToLegalize;
1974 
1975     Observer.changingInstr(MI);
1976     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1977     Observer.changedInstr(MI);
1978     return Legalized;
1979   case TargetOpcode::G_PTRTOINT:
1980     if (TypeIdx != 0)
1981       return UnableToLegalize;
1982 
1983     Observer.changingInstr(MI);
1984     widenScalarDst(MI, WideTy, 0);
1985     Observer.changedInstr(MI);
1986     return Legalized;
1987   case TargetOpcode::G_BUILD_VECTOR: {
1988     Observer.changingInstr(MI);
1989 
1990     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1991     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1992       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1993 
1994     // Avoid changing the result vector type if the source element type was
1995     // requested.
1996     if (TypeIdx == 1) {
1997       auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1998       MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1999     } else {
2000       widenScalarDst(MI, WideTy, 0);
2001     }
2002 
2003     Observer.changedInstr(MI);
2004     return Legalized;
2005   }
2006   case TargetOpcode::G_SEXT_INREG:
2007     if (TypeIdx != 0)
2008       return UnableToLegalize;
2009 
2010     Observer.changingInstr(MI);
2011     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2012     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2013     Observer.changedInstr(MI);
2014     return Legalized;
2015   }
2016 }
2017 
2018 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2019                              MachineIRBuilder &B, Register Src, LLT Ty) {
2020   auto Unmerge = B.buildUnmerge(Ty, Src);
2021   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2022     Pieces.push_back(Unmerge.getReg(I));
2023 }
2024 
2025 LegalizerHelper::LegalizeResult
2026 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2027   Register Dst = MI.getOperand(0).getReg();
2028   Register Src = MI.getOperand(1).getReg();
2029   LLT DstTy = MRI.getType(Dst);
2030   LLT SrcTy = MRI.getType(Src);
2031 
2032   if (SrcTy.isVector() && !DstTy.isVector()) {
2033     SmallVector<Register, 8> SrcRegs;
2034     getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcTy.getElementType());
2035     MIRBuilder.buildMerge(Dst, SrcRegs);
2036     MI.eraseFromParent();
2037     return Legalized;
2038   }
2039 
2040   if (DstTy.isVector() && !SrcTy.isVector()) {
2041     SmallVector<Register, 8> SrcRegs;
2042     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2043     MIRBuilder.buildMerge(Dst, SrcRegs);
2044     MI.eraseFromParent();
2045     return Legalized;
2046   }
2047 
2048   return UnableToLegalize;
2049 }
2050 
2051 LegalizerHelper::LegalizeResult
2052 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2053   using namespace TargetOpcode;
2054   MIRBuilder.setInstr(MI);
2055 
2056   switch(MI.getOpcode()) {
2057   default:
2058     return UnableToLegalize;
2059   case TargetOpcode::G_BITCAST:
2060     return lowerBitcast(MI);
2061   case TargetOpcode::G_SREM:
2062   case TargetOpcode::G_UREM: {
2063     Register QuotReg = MRI.createGenericVirtualRegister(Ty);
2064     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {QuotReg},
2065                           {MI.getOperand(1), MI.getOperand(2)});
2066 
2067     Register ProdReg = MRI.createGenericVirtualRegister(Ty);
2068     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2));
2069     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), ProdReg);
2070     MI.eraseFromParent();
2071     return Legalized;
2072   }
2073   case TargetOpcode::G_SADDO:
2074   case TargetOpcode::G_SSUBO:
2075     return lowerSADDO_SSUBO(MI);
2076   case TargetOpcode::G_SMULO:
2077   case TargetOpcode::G_UMULO: {
2078     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2079     // result.
2080     Register Res = MI.getOperand(0).getReg();
2081     Register Overflow = MI.getOperand(1).getReg();
2082     Register LHS = MI.getOperand(2).getReg();
2083     Register RHS = MI.getOperand(3).getReg();
2084 
2085     MIRBuilder.buildMul(Res, LHS, RHS);
2086 
2087     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2088                           ? TargetOpcode::G_SMULH
2089                           : TargetOpcode::G_UMULH;
2090 
2091     Register HiPart = MRI.createGenericVirtualRegister(Ty);
2092     MIRBuilder.buildInstr(Opcode)
2093       .addDef(HiPart)
2094       .addUse(LHS)
2095       .addUse(RHS);
2096 
2097     Register Zero = MRI.createGenericVirtualRegister(Ty);
2098     MIRBuilder.buildConstant(Zero, 0);
2099 
2100     // For *signed* multiply, overflow is detected by checking:
2101     // (hi != (lo >> bitwidth-1))
2102     if (Opcode == TargetOpcode::G_SMULH) {
2103       Register Shifted = MRI.createGenericVirtualRegister(Ty);
2104       Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
2105       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
2106       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
2107         .addDef(Shifted)
2108         .addUse(Res)
2109         .addUse(ShiftAmt);
2110       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2111     } else {
2112       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2113     }
2114     MI.eraseFromParent();
2115     return Legalized;
2116   }
2117   case TargetOpcode::G_FNEG: {
2118     // TODO: Handle vector types once we are able to
2119     // represent them.
2120     if (Ty.isVector())
2121       return UnableToLegalize;
2122     Register Res = MI.getOperand(0).getReg();
2123     Type *ZeroTy;
2124     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2125     switch (Ty.getSizeInBits()) {
2126     case 16:
2127       ZeroTy = Type::getHalfTy(Ctx);
2128       break;
2129     case 32:
2130       ZeroTy = Type::getFloatTy(Ctx);
2131       break;
2132     case 64:
2133       ZeroTy = Type::getDoubleTy(Ctx);
2134       break;
2135     case 128:
2136       ZeroTy = Type::getFP128Ty(Ctx);
2137       break;
2138     default:
2139       llvm_unreachable("unexpected floating-point type");
2140     }
2141     ConstantFP &ZeroForNegation =
2142         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
2143     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
2144     Register SubByReg = MI.getOperand(1).getReg();
2145     Register ZeroReg = Zero.getReg(0);
2146     MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
2147     MI.eraseFromParent();
2148     return Legalized;
2149   }
2150   case TargetOpcode::G_FSUB: {
2151     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2152     // First, check if G_FNEG is marked as Lower. If so, we may
2153     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2154     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2155       return UnableToLegalize;
2156     Register Res = MI.getOperand(0).getReg();
2157     Register LHS = MI.getOperand(1).getReg();
2158     Register RHS = MI.getOperand(2).getReg();
2159     Register Neg = MRI.createGenericVirtualRegister(Ty);
2160     MIRBuilder.buildFNeg(Neg, RHS);
2161     MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2162     MI.eraseFromParent();
2163     return Legalized;
2164   }
2165   case TargetOpcode::G_FMAD:
2166     return lowerFMad(MI);
2167   case TargetOpcode::G_INTRINSIC_ROUND:
2168     return lowerIntrinsicRound(MI);
2169   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2170     Register OldValRes = MI.getOperand(0).getReg();
2171     Register SuccessRes = MI.getOperand(1).getReg();
2172     Register Addr = MI.getOperand(2).getReg();
2173     Register CmpVal = MI.getOperand(3).getReg();
2174     Register NewVal = MI.getOperand(4).getReg();
2175     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2176                                   **MI.memoperands_begin());
2177     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2178     MI.eraseFromParent();
2179     return Legalized;
2180   }
2181   case TargetOpcode::G_LOAD:
2182   case TargetOpcode::G_SEXTLOAD:
2183   case TargetOpcode::G_ZEXTLOAD: {
2184     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2185     Register DstReg = MI.getOperand(0).getReg();
2186     Register PtrReg = MI.getOperand(1).getReg();
2187     LLT DstTy = MRI.getType(DstReg);
2188     auto &MMO = **MI.memoperands_begin();
2189 
2190     if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2191       if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2192         // This load needs splitting into power of 2 sized loads.
2193         if (DstTy.isVector())
2194           return UnableToLegalize;
2195         if (isPowerOf2_32(DstTy.getSizeInBits()))
2196           return UnableToLegalize; // Don't know what we're being asked to do.
2197 
2198         // Our strategy here is to generate anyextending loads for the smaller
2199         // types up to next power-2 result type, and then combine the two larger
2200         // result values together, before truncating back down to the non-pow-2
2201         // type.
2202         // E.g. v1 = i24 load =>
2203         // v2 = i32 load (2 byte)
2204         // v3 = i32 load (1 byte)
2205         // v4 = i32 shl v3, 16
2206         // v5 = i32 or v4, v2
2207         // v1 = i24 trunc v5
2208         // By doing this we generate the correct truncate which should get
2209         // combined away as an artifact with a matching extend.
2210         uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2211         uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2212 
2213         MachineFunction &MF = MIRBuilder.getMF();
2214         MachineMemOperand *LargeMMO =
2215             MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2216         MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2217             &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2218 
2219         LLT PtrTy = MRI.getType(PtrReg);
2220         unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2221         LLT AnyExtTy = LLT::scalar(AnyExtSize);
2222         Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2223         Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2224         auto LargeLoad =
2225             MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
2226 
2227         auto OffsetCst =
2228             MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2229         Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2230         auto SmallPtr =
2231             MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2232         auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2233                                               *SmallMMO);
2234 
2235         auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2236         auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2237         auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2238         MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2239         MI.eraseFromParent();
2240         return Legalized;
2241       }
2242       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2243       MI.eraseFromParent();
2244       return Legalized;
2245     }
2246 
2247     if (DstTy.isScalar()) {
2248       Register TmpReg =
2249           MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2250       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2251       switch (MI.getOpcode()) {
2252       default:
2253         llvm_unreachable("Unexpected opcode");
2254       case TargetOpcode::G_LOAD:
2255         MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
2256         break;
2257       case TargetOpcode::G_SEXTLOAD:
2258         MIRBuilder.buildSExt(DstReg, TmpReg);
2259         break;
2260       case TargetOpcode::G_ZEXTLOAD:
2261         MIRBuilder.buildZExt(DstReg, TmpReg);
2262         break;
2263       }
2264       MI.eraseFromParent();
2265       return Legalized;
2266     }
2267 
2268     return UnableToLegalize;
2269   }
2270   case TargetOpcode::G_STORE: {
2271     // Lower a non-power of 2 store into multiple pow-2 stores.
2272     // E.g. split an i24 store into an i16 store + i8 store.
2273     // We do this by first extending the stored value to the next largest power
2274     // of 2 type, and then using truncating stores to store the components.
2275     // By doing this, likewise with G_LOAD, generate an extend that can be
2276     // artifact-combined away instead of leaving behind extracts.
2277     Register SrcReg = MI.getOperand(0).getReg();
2278     Register PtrReg = MI.getOperand(1).getReg();
2279     LLT SrcTy = MRI.getType(SrcReg);
2280     MachineMemOperand &MMO = **MI.memoperands_begin();
2281     if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2282       return UnableToLegalize;
2283     if (SrcTy.isVector())
2284       return UnableToLegalize;
2285     if (isPowerOf2_32(SrcTy.getSizeInBits()))
2286       return UnableToLegalize; // Don't know what we're being asked to do.
2287 
2288     // Extend to the next pow-2.
2289     const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2290     auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2291 
2292     // Obtain the smaller value by shifting away the larger value.
2293     uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2294     uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2295     auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2296     auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2297 
2298     // Generate the PtrAdd and truncating stores.
2299     LLT PtrTy = MRI.getType(PtrReg);
2300     auto OffsetCst =
2301         MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2302     Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2303     auto SmallPtr =
2304         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2305 
2306     MachineFunction &MF = MIRBuilder.getMF();
2307     MachineMemOperand *LargeMMO =
2308         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2309     MachineMemOperand *SmallMMO =
2310         MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2311     MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2312     MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2313     MI.eraseFromParent();
2314     return Legalized;
2315   }
2316   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2317   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2318   case TargetOpcode::G_CTLZ:
2319   case TargetOpcode::G_CTTZ:
2320   case TargetOpcode::G_CTPOP:
2321     return lowerBitCount(MI, TypeIdx, Ty);
2322   case G_UADDO: {
2323     Register Res = MI.getOperand(0).getReg();
2324     Register CarryOut = MI.getOperand(1).getReg();
2325     Register LHS = MI.getOperand(2).getReg();
2326     Register RHS = MI.getOperand(3).getReg();
2327 
2328     MIRBuilder.buildAdd(Res, LHS, RHS);
2329     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2330 
2331     MI.eraseFromParent();
2332     return Legalized;
2333   }
2334   case G_UADDE: {
2335     Register Res = MI.getOperand(0).getReg();
2336     Register CarryOut = MI.getOperand(1).getReg();
2337     Register LHS = MI.getOperand(2).getReg();
2338     Register RHS = MI.getOperand(3).getReg();
2339     Register CarryIn = MI.getOperand(4).getReg();
2340 
2341     Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2342     Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
2343 
2344     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
2345     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
2346     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2347     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2348 
2349     MI.eraseFromParent();
2350     return Legalized;
2351   }
2352   case G_USUBO: {
2353     Register Res = MI.getOperand(0).getReg();
2354     Register BorrowOut = MI.getOperand(1).getReg();
2355     Register LHS = MI.getOperand(2).getReg();
2356     Register RHS = MI.getOperand(3).getReg();
2357 
2358     MIRBuilder.buildSub(Res, LHS, RHS);
2359     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2360 
2361     MI.eraseFromParent();
2362     return Legalized;
2363   }
2364   case G_USUBE: {
2365     Register Res = MI.getOperand(0).getReg();
2366     Register BorrowOut = MI.getOperand(1).getReg();
2367     Register LHS = MI.getOperand(2).getReg();
2368     Register RHS = MI.getOperand(3).getReg();
2369     Register BorrowIn = MI.getOperand(4).getReg();
2370 
2371     Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2372     Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
2373     Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2374     Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2375 
2376     MIRBuilder.buildSub(TmpRes, LHS, RHS);
2377     MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
2378     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2379     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
2380     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
2381     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2382 
2383     MI.eraseFromParent();
2384     return Legalized;
2385   }
2386   case G_UITOFP:
2387     return lowerUITOFP(MI, TypeIdx, Ty);
2388   case G_SITOFP:
2389     return lowerSITOFP(MI, TypeIdx, Ty);
2390   case G_FPTOUI:
2391     return lowerFPTOUI(MI, TypeIdx, Ty);
2392   case G_SMIN:
2393   case G_SMAX:
2394   case G_UMIN:
2395   case G_UMAX:
2396     return lowerMinMax(MI, TypeIdx, Ty);
2397   case G_FCOPYSIGN:
2398     return lowerFCopySign(MI, TypeIdx, Ty);
2399   case G_FMINNUM:
2400   case G_FMAXNUM:
2401     return lowerFMinNumMaxNum(MI);
2402   case G_UNMERGE_VALUES:
2403     return lowerUnmergeValues(MI);
2404   case TargetOpcode::G_SEXT_INREG: {
2405     assert(MI.getOperand(2).isImm() && "Expected immediate");
2406     int64_t SizeInBits = MI.getOperand(2).getImm();
2407 
2408     Register DstReg = MI.getOperand(0).getReg();
2409     Register SrcReg = MI.getOperand(1).getReg();
2410     LLT DstTy = MRI.getType(DstReg);
2411     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2412 
2413     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2414     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
2415     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
2416     MI.eraseFromParent();
2417     return Legalized;
2418   }
2419   case G_SHUFFLE_VECTOR:
2420     return lowerShuffleVector(MI);
2421   case G_DYN_STACKALLOC:
2422     return lowerDynStackAlloc(MI);
2423   case G_EXTRACT:
2424     return lowerExtract(MI);
2425   case G_INSERT:
2426     return lowerInsert(MI);
2427   case G_BSWAP:
2428     return lowerBswap(MI);
2429   case G_BITREVERSE:
2430     return lowerBitreverse(MI);
2431   case G_READ_REGISTER:
2432     return lowerReadRegister(MI);
2433   }
2434 }
2435 
2436 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2437     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2438   SmallVector<Register, 2> DstRegs;
2439 
2440   unsigned NarrowSize = NarrowTy.getSizeInBits();
2441   Register DstReg = MI.getOperand(0).getReg();
2442   unsigned Size = MRI.getType(DstReg).getSizeInBits();
2443   int NumParts = Size / NarrowSize;
2444   // FIXME: Don't know how to handle the situation where the small vectors
2445   // aren't all the same size yet.
2446   if (Size % NarrowSize != 0)
2447     return UnableToLegalize;
2448 
2449   for (int i = 0; i < NumParts; ++i) {
2450     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2451     MIRBuilder.buildUndef(TmpReg);
2452     DstRegs.push_back(TmpReg);
2453   }
2454 
2455   if (NarrowTy.isVector())
2456     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2457   else
2458     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2459 
2460   MI.eraseFromParent();
2461   return Legalized;
2462 }
2463 
2464 LegalizerHelper::LegalizeResult
2465 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
2466                                           LLT NarrowTy) {
2467   const unsigned Opc = MI.getOpcode();
2468   const unsigned NumOps = MI.getNumOperands() - 1;
2469   const unsigned NarrowSize = NarrowTy.getSizeInBits();
2470   const Register DstReg = MI.getOperand(0).getReg();
2471   const unsigned Flags = MI.getFlags();
2472   const LLT DstTy = MRI.getType(DstReg);
2473   const unsigned Size = DstTy.getSizeInBits();
2474   const int NumParts = Size / NarrowSize;
2475   const LLT EltTy = DstTy.getElementType();
2476   const unsigned EltSize = EltTy.getSizeInBits();
2477   const unsigned BitsForNumParts = NarrowSize * NumParts;
2478 
2479   // Check if we have any leftovers. If we do, then only handle the case where
2480   // the leftover is one element.
2481   if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2482     return UnableToLegalize;
2483 
2484   if (BitsForNumParts != Size) {
2485     Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2486     MIRBuilder.buildUndef(AccumDstReg);
2487 
2488     // Handle the pieces which evenly divide into the requested type with
2489     // extract/op/insert sequence.
2490     for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2491       SmallVector<SrcOp, 4> SrcOps;
2492       for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2493         Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2494         MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), Offset);
2495         SrcOps.push_back(PartOpReg);
2496       }
2497 
2498       Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2499       MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2500 
2501       Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2502       MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2503       AccumDstReg = PartInsertReg;
2504     }
2505 
2506     // Handle the remaining element sized leftover piece.
2507     SmallVector<SrcOp, 4> SrcOps;
2508     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2509       Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2510       MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), BitsForNumParts);
2511       SrcOps.push_back(PartOpReg);
2512     }
2513 
2514     Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2515     MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2516     MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2517     MI.eraseFromParent();
2518 
2519     return Legalized;
2520   }
2521 
2522   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2523 
2524   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2525 
2526   if (NumOps >= 2)
2527     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2528 
2529   if (NumOps >= 3)
2530     extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2531 
2532   for (int i = 0; i < NumParts; ++i) {
2533     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2534 
2535     if (NumOps == 1)
2536       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2537     else if (NumOps == 2) {
2538       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2539     } else if (NumOps == 3) {
2540       MIRBuilder.buildInstr(Opc, {DstReg},
2541                             {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2542     }
2543 
2544     DstRegs.push_back(DstReg);
2545   }
2546 
2547   if (NarrowTy.isVector())
2548     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2549   else
2550     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2551 
2552   MI.eraseFromParent();
2553   return Legalized;
2554 }
2555 
2556 // Handle splitting vector operations which need to have the same number of
2557 // elements in each type index, but each type index may have a different element
2558 // type.
2559 //
2560 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2561 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2562 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2563 //
2564 // Also handles some irregular breakdown cases, e.g.
2565 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2566 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2567 //             s64 = G_SHL s64, s32
2568 LegalizerHelper::LegalizeResult
2569 LegalizerHelper::fewerElementsVectorMultiEltType(
2570   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2571   if (TypeIdx != 0)
2572     return UnableToLegalize;
2573 
2574   const LLT NarrowTy0 = NarrowTyArg;
2575   const unsigned NewNumElts =
2576       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2577 
2578   const Register DstReg = MI.getOperand(0).getReg();
2579   LLT DstTy = MRI.getType(DstReg);
2580   LLT LeftoverTy0;
2581 
2582   // All of the operands need to have the same number of elements, so if we can
2583   // determine a type breakdown for the result type, we can for all of the
2584   // source types.
2585   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2586   if (NumParts < 0)
2587     return UnableToLegalize;
2588 
2589   SmallVector<MachineInstrBuilder, 4> NewInsts;
2590 
2591   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2592   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2593 
2594   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2595     LLT LeftoverTy;
2596     Register SrcReg = MI.getOperand(I).getReg();
2597     LLT SrcTyI = MRI.getType(SrcReg);
2598     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2599     LLT LeftoverTyI;
2600 
2601     // Split this operand into the requested typed registers, and any leftover
2602     // required to reproduce the original type.
2603     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2604                       LeftoverRegs))
2605       return UnableToLegalize;
2606 
2607     if (I == 1) {
2608       // For the first operand, create an instruction for each part and setup
2609       // the result.
2610       for (Register PartReg : PartRegs) {
2611         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2612         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2613                                .addDef(PartDstReg)
2614                                .addUse(PartReg));
2615         DstRegs.push_back(PartDstReg);
2616       }
2617 
2618       for (Register LeftoverReg : LeftoverRegs) {
2619         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2620         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2621                                .addDef(PartDstReg)
2622                                .addUse(LeftoverReg));
2623         LeftoverDstRegs.push_back(PartDstReg);
2624       }
2625     } else {
2626       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2627 
2628       // Add the newly created operand splits to the existing instructions. The
2629       // odd-sized pieces are ordered after the requested NarrowTyArg sized
2630       // pieces.
2631       unsigned InstCount = 0;
2632       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2633         NewInsts[InstCount++].addUse(PartRegs[J]);
2634       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2635         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2636     }
2637 
2638     PartRegs.clear();
2639     LeftoverRegs.clear();
2640   }
2641 
2642   // Insert the newly built operations and rebuild the result register.
2643   for (auto &MIB : NewInsts)
2644     MIRBuilder.insertInstr(MIB);
2645 
2646   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2647 
2648   MI.eraseFromParent();
2649   return Legalized;
2650 }
2651 
2652 LegalizerHelper::LegalizeResult
2653 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2654                                           LLT NarrowTy) {
2655   if (TypeIdx != 0)
2656     return UnableToLegalize;
2657 
2658   Register DstReg = MI.getOperand(0).getReg();
2659   Register SrcReg = MI.getOperand(1).getReg();
2660   LLT DstTy = MRI.getType(DstReg);
2661   LLT SrcTy = MRI.getType(SrcReg);
2662 
2663   LLT NarrowTy0 = NarrowTy;
2664   LLT NarrowTy1;
2665   unsigned NumParts;
2666 
2667   if (NarrowTy.isVector()) {
2668     // Uneven breakdown not handled.
2669     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2670     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2671       return UnableToLegalize;
2672 
2673     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2674   } else {
2675     NumParts = DstTy.getNumElements();
2676     NarrowTy1 = SrcTy.getElementType();
2677   }
2678 
2679   SmallVector<Register, 4> SrcRegs, DstRegs;
2680   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2681 
2682   for (unsigned I = 0; I < NumParts; ++I) {
2683     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2684     MachineInstr *NewInst =
2685         MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
2686 
2687     NewInst->setFlags(MI.getFlags());
2688     DstRegs.push_back(DstReg);
2689   }
2690 
2691   if (NarrowTy.isVector())
2692     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2693   else
2694     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2695 
2696   MI.eraseFromParent();
2697   return Legalized;
2698 }
2699 
2700 LegalizerHelper::LegalizeResult
2701 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2702                                         LLT NarrowTy) {
2703   Register DstReg = MI.getOperand(0).getReg();
2704   Register Src0Reg = MI.getOperand(2).getReg();
2705   LLT DstTy = MRI.getType(DstReg);
2706   LLT SrcTy = MRI.getType(Src0Reg);
2707 
2708   unsigned NumParts;
2709   LLT NarrowTy0, NarrowTy1;
2710 
2711   if (TypeIdx == 0) {
2712     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2713     unsigned OldElts = DstTy.getNumElements();
2714 
2715     NarrowTy0 = NarrowTy;
2716     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2717     NarrowTy1 = NarrowTy.isVector() ?
2718       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2719       SrcTy.getElementType();
2720 
2721   } else {
2722     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2723     unsigned OldElts = SrcTy.getNumElements();
2724 
2725     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2726       NarrowTy.getNumElements();
2727     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2728                             DstTy.getScalarSizeInBits());
2729     NarrowTy1 = NarrowTy;
2730   }
2731 
2732   // FIXME: Don't know how to handle the situation where the small vectors
2733   // aren't all the same size yet.
2734   if (NarrowTy1.isVector() &&
2735       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2736     return UnableToLegalize;
2737 
2738   CmpInst::Predicate Pred
2739     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2740 
2741   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2742   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2743   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2744 
2745   for (unsigned I = 0; I < NumParts; ++I) {
2746     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2747     DstRegs.push_back(DstReg);
2748 
2749     if (MI.getOpcode() == TargetOpcode::G_ICMP)
2750       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2751     else {
2752       MachineInstr *NewCmp
2753         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2754       NewCmp->setFlags(MI.getFlags());
2755     }
2756   }
2757 
2758   if (NarrowTy1.isVector())
2759     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2760   else
2761     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2762 
2763   MI.eraseFromParent();
2764   return Legalized;
2765 }
2766 
2767 LegalizerHelper::LegalizeResult
2768 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2769                                            LLT NarrowTy) {
2770   Register DstReg = MI.getOperand(0).getReg();
2771   Register CondReg = MI.getOperand(1).getReg();
2772 
2773   unsigned NumParts = 0;
2774   LLT NarrowTy0, NarrowTy1;
2775 
2776   LLT DstTy = MRI.getType(DstReg);
2777   LLT CondTy = MRI.getType(CondReg);
2778   unsigned Size = DstTy.getSizeInBits();
2779 
2780   assert(TypeIdx == 0 || CondTy.isVector());
2781 
2782   if (TypeIdx == 0) {
2783     NarrowTy0 = NarrowTy;
2784     NarrowTy1 = CondTy;
2785 
2786     unsigned NarrowSize = NarrowTy0.getSizeInBits();
2787     // FIXME: Don't know how to handle the situation where the small vectors
2788     // aren't all the same size yet.
2789     if (Size % NarrowSize != 0)
2790       return UnableToLegalize;
2791 
2792     NumParts = Size / NarrowSize;
2793 
2794     // Need to break down the condition type
2795     if (CondTy.isVector()) {
2796       if (CondTy.getNumElements() == NumParts)
2797         NarrowTy1 = CondTy.getElementType();
2798       else
2799         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2800                                 CondTy.getScalarSizeInBits());
2801     }
2802   } else {
2803     NumParts = CondTy.getNumElements();
2804     if (NarrowTy.isVector()) {
2805       // TODO: Handle uneven breakdown.
2806       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2807         return UnableToLegalize;
2808 
2809       return UnableToLegalize;
2810     } else {
2811       NarrowTy0 = DstTy.getElementType();
2812       NarrowTy1 = NarrowTy;
2813     }
2814   }
2815 
2816   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2817   if (CondTy.isVector())
2818     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2819 
2820   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2821   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2822 
2823   for (unsigned i = 0; i < NumParts; ++i) {
2824     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2825     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2826                            Src1Regs[i], Src2Regs[i]);
2827     DstRegs.push_back(DstReg);
2828   }
2829 
2830   if (NarrowTy0.isVector())
2831     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2832   else
2833     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2834 
2835   MI.eraseFromParent();
2836   return Legalized;
2837 }
2838 
2839 LegalizerHelper::LegalizeResult
2840 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2841                                         LLT NarrowTy) {
2842   const Register DstReg = MI.getOperand(0).getReg();
2843   LLT PhiTy = MRI.getType(DstReg);
2844   LLT LeftoverTy;
2845 
2846   // All of the operands need to have the same number of elements, so if we can
2847   // determine a type breakdown for the result type, we can for all of the
2848   // source types.
2849   int NumParts, NumLeftover;
2850   std::tie(NumParts, NumLeftover)
2851     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2852   if (NumParts < 0)
2853     return UnableToLegalize;
2854 
2855   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2856   SmallVector<MachineInstrBuilder, 4> NewInsts;
2857 
2858   const int TotalNumParts = NumParts + NumLeftover;
2859 
2860   // Insert the new phis in the result block first.
2861   for (int I = 0; I != TotalNumParts; ++I) {
2862     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2863     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2864     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2865                        .addDef(PartDstReg));
2866     if (I < NumParts)
2867       DstRegs.push_back(PartDstReg);
2868     else
2869       LeftoverDstRegs.push_back(PartDstReg);
2870   }
2871 
2872   MachineBasicBlock *MBB = MI.getParent();
2873   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2874   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2875 
2876   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2877 
2878   // Insert code to extract the incoming values in each predecessor block.
2879   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2880     PartRegs.clear();
2881     LeftoverRegs.clear();
2882 
2883     Register SrcReg = MI.getOperand(I).getReg();
2884     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2885     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2886 
2887     LLT Unused;
2888     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2889                       LeftoverRegs))
2890       return UnableToLegalize;
2891 
2892     // Add the newly created operand splits to the existing instructions. The
2893     // odd-sized pieces are ordered after the requested NarrowTyArg sized
2894     // pieces.
2895     for (int J = 0; J != TotalNumParts; ++J) {
2896       MachineInstrBuilder MIB = NewInsts[J];
2897       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2898       MIB.addMBB(&OpMBB);
2899     }
2900   }
2901 
2902   MI.eraseFromParent();
2903   return Legalized;
2904 }
2905 
2906 LegalizerHelper::LegalizeResult
2907 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
2908                                                   unsigned TypeIdx,
2909                                                   LLT NarrowTy) {
2910   if (TypeIdx != 1)
2911     return UnableToLegalize;
2912 
2913   const int NumDst = MI.getNumOperands() - 1;
2914   const Register SrcReg = MI.getOperand(NumDst).getReg();
2915   LLT SrcTy = MRI.getType(SrcReg);
2916 
2917   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2918 
2919   // TODO: Create sequence of extracts.
2920   if (DstTy == NarrowTy)
2921     return UnableToLegalize;
2922 
2923   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2924   if (DstTy == GCDTy) {
2925     // This would just be a copy of the same unmerge.
2926     // TODO: Create extracts, pad with undef and create intermediate merges.
2927     return UnableToLegalize;
2928   }
2929 
2930   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2931   const int NumUnmerge = Unmerge->getNumOperands() - 1;
2932   const int PartsPerUnmerge = NumDst / NumUnmerge;
2933 
2934   for (int I = 0; I != NumUnmerge; ++I) {
2935     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2936 
2937     for (int J = 0; J != PartsPerUnmerge; ++J)
2938       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2939     MIB.addUse(Unmerge.getReg(I));
2940   }
2941 
2942   MI.eraseFromParent();
2943   return Legalized;
2944 }
2945 
2946 LegalizerHelper::LegalizeResult
2947 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
2948                                                 unsigned TypeIdx,
2949                                                 LLT NarrowTy) {
2950   assert(TypeIdx == 0 && "not a vector type index");
2951   Register DstReg = MI.getOperand(0).getReg();
2952   LLT DstTy = MRI.getType(DstReg);
2953   LLT SrcTy = DstTy.getElementType();
2954 
2955   int DstNumElts = DstTy.getNumElements();
2956   int NarrowNumElts = NarrowTy.getNumElements();
2957   int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
2958   LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
2959 
2960   SmallVector<Register, 8> ConcatOps;
2961   SmallVector<Register, 8> SubBuildVector;
2962 
2963   Register UndefReg;
2964   if (WidenedDstTy != DstTy)
2965     UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
2966 
2967   // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
2968   // necessary.
2969   //
2970   // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
2971   //   -> <2 x s16>
2972   //
2973   // %4:_(s16) = G_IMPLICIT_DEF
2974   // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
2975   // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
2976   // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
2977   // %3:_(<3 x s16>) = G_EXTRACT %7, 0
2978   for (int I = 0; I != NumConcat; ++I) {
2979     for (int J = 0; J != NarrowNumElts; ++J) {
2980       int SrcIdx = NarrowNumElts * I + J;
2981 
2982       if (SrcIdx < DstNumElts) {
2983         Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
2984         SubBuildVector.push_back(SrcReg);
2985       } else
2986         SubBuildVector.push_back(UndefReg);
2987     }
2988 
2989     auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
2990     ConcatOps.push_back(BuildVec.getReg(0));
2991     SubBuildVector.clear();
2992   }
2993 
2994   if (DstTy == WidenedDstTy)
2995     MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
2996   else {
2997     auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
2998     MIRBuilder.buildExtract(DstReg, Concat, 0);
2999   }
3000 
3001   MI.eraseFromParent();
3002   return Legalized;
3003 }
3004 
3005 LegalizerHelper::LegalizeResult
3006 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3007                                       LLT NarrowTy) {
3008   // FIXME: Don't know how to handle secondary types yet.
3009   if (TypeIdx != 0)
3010     return UnableToLegalize;
3011 
3012   MachineMemOperand *MMO = *MI.memoperands_begin();
3013 
3014   // This implementation doesn't work for atomics. Give up instead of doing
3015   // something invalid.
3016   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3017       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3018     return UnableToLegalize;
3019 
3020   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3021   Register ValReg = MI.getOperand(0).getReg();
3022   Register AddrReg = MI.getOperand(1).getReg();
3023   LLT ValTy = MRI.getType(ValReg);
3024 
3025   int NumParts = -1;
3026   int NumLeftover = -1;
3027   LLT LeftoverTy;
3028   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3029   if (IsLoad) {
3030     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3031   } else {
3032     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3033                      NarrowLeftoverRegs)) {
3034       NumParts = NarrowRegs.size();
3035       NumLeftover = NarrowLeftoverRegs.size();
3036     }
3037   }
3038 
3039   if (NumParts == -1)
3040     return UnableToLegalize;
3041 
3042   const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
3043 
3044   unsigned TotalSize = ValTy.getSizeInBits();
3045 
3046   // Split the load/store into PartTy sized pieces starting at Offset. If this
3047   // is a load, return the new registers in ValRegs. For a store, each elements
3048   // of ValRegs should be PartTy. Returns the next offset that needs to be
3049   // handled.
3050   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3051                              unsigned Offset) -> unsigned {
3052     MachineFunction &MF = MIRBuilder.getMF();
3053     unsigned PartSize = PartTy.getSizeInBits();
3054     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3055          Offset += PartSize, ++Idx) {
3056       unsigned ByteSize = PartSize / 8;
3057       unsigned ByteOffset = Offset / 8;
3058       Register NewAddrReg;
3059 
3060       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3061 
3062       MachineMemOperand *NewMMO =
3063         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3064 
3065       if (IsLoad) {
3066         Register Dst = MRI.createGenericVirtualRegister(PartTy);
3067         ValRegs.push_back(Dst);
3068         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3069       } else {
3070         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3071       }
3072     }
3073 
3074     return Offset;
3075   };
3076 
3077   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3078 
3079   // Handle the rest of the register if this isn't an even type breakdown.
3080   if (LeftoverTy.isValid())
3081     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3082 
3083   if (IsLoad) {
3084     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3085                 LeftoverTy, NarrowLeftoverRegs);
3086   }
3087 
3088   MI.eraseFromParent();
3089   return Legalized;
3090 }
3091 
3092 LegalizerHelper::LegalizeResult
3093 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
3094                                      LLT NarrowTy) {
3095   using namespace TargetOpcode;
3096 
3097   MIRBuilder.setInstr(MI);
3098   switch (MI.getOpcode()) {
3099   case G_IMPLICIT_DEF:
3100     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3101   case G_AND:
3102   case G_OR:
3103   case G_XOR:
3104   case G_ADD:
3105   case G_SUB:
3106   case G_MUL:
3107   case G_SMULH:
3108   case G_UMULH:
3109   case G_FADD:
3110   case G_FMUL:
3111   case G_FSUB:
3112   case G_FNEG:
3113   case G_FABS:
3114   case G_FCANONICALIZE:
3115   case G_FDIV:
3116   case G_FREM:
3117   case G_FMA:
3118   case G_FMAD:
3119   case G_FPOW:
3120   case G_FEXP:
3121   case G_FEXP2:
3122   case G_FLOG:
3123   case G_FLOG2:
3124   case G_FLOG10:
3125   case G_FNEARBYINT:
3126   case G_FCEIL:
3127   case G_FFLOOR:
3128   case G_FRINT:
3129   case G_INTRINSIC_ROUND:
3130   case G_INTRINSIC_TRUNC:
3131   case G_FCOS:
3132   case G_FSIN:
3133   case G_FSQRT:
3134   case G_BSWAP:
3135   case G_BITREVERSE:
3136   case G_SDIV:
3137   case G_UDIV:
3138   case G_SREM:
3139   case G_UREM:
3140   case G_SMIN:
3141   case G_SMAX:
3142   case G_UMIN:
3143   case G_UMAX:
3144   case G_FMINNUM:
3145   case G_FMAXNUM:
3146   case G_FMINNUM_IEEE:
3147   case G_FMAXNUM_IEEE:
3148   case G_FMINIMUM:
3149   case G_FMAXIMUM:
3150     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
3151   case G_SHL:
3152   case G_LSHR:
3153   case G_ASHR:
3154   case G_CTLZ:
3155   case G_CTLZ_ZERO_UNDEF:
3156   case G_CTTZ:
3157   case G_CTTZ_ZERO_UNDEF:
3158   case G_CTPOP:
3159   case G_FCOPYSIGN:
3160     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
3161   case G_ZEXT:
3162   case G_SEXT:
3163   case G_ANYEXT:
3164   case G_FPEXT:
3165   case G_FPTRUNC:
3166   case G_SITOFP:
3167   case G_UITOFP:
3168   case G_FPTOSI:
3169   case G_FPTOUI:
3170   case G_INTTOPTR:
3171   case G_PTRTOINT:
3172   case G_ADDRSPACE_CAST:
3173     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
3174   case G_ICMP:
3175   case G_FCMP:
3176     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3177   case G_SELECT:
3178     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3179   case G_PHI:
3180     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3181   case G_UNMERGE_VALUES:
3182     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3183   case G_BUILD_VECTOR:
3184     return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3185   case G_LOAD:
3186   case G_STORE:
3187     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3188   default:
3189     return UnableToLegalize;
3190   }
3191 }
3192 
3193 LegalizerHelper::LegalizeResult
3194 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
3195                                              const LLT HalfTy, const LLT AmtTy) {
3196 
3197   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3198   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3199   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3200 
3201   if (Amt.isNullValue()) {
3202     MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
3203     MI.eraseFromParent();
3204     return Legalized;
3205   }
3206 
3207   LLT NVT = HalfTy;
3208   unsigned NVTBits = HalfTy.getSizeInBits();
3209   unsigned VTBits = 2 * NVTBits;
3210 
3211   SrcOp Lo(Register(0)), Hi(Register(0));
3212   if (MI.getOpcode() == TargetOpcode::G_SHL) {
3213     if (Amt.ugt(VTBits)) {
3214       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3215     } else if (Amt.ugt(NVTBits)) {
3216       Lo = MIRBuilder.buildConstant(NVT, 0);
3217       Hi = MIRBuilder.buildShl(NVT, InL,
3218                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3219     } else if (Amt == NVTBits) {
3220       Lo = MIRBuilder.buildConstant(NVT, 0);
3221       Hi = InL;
3222     } else {
3223       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3224       auto OrLHS =
3225           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3226       auto OrRHS = MIRBuilder.buildLShr(
3227           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3228       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3229     }
3230   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3231     if (Amt.ugt(VTBits)) {
3232       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3233     } else if (Amt.ugt(NVTBits)) {
3234       Lo = MIRBuilder.buildLShr(NVT, InH,
3235                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3236       Hi = MIRBuilder.buildConstant(NVT, 0);
3237     } else if (Amt == NVTBits) {
3238       Lo = InH;
3239       Hi = MIRBuilder.buildConstant(NVT, 0);
3240     } else {
3241       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3242 
3243       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3244       auto OrRHS = MIRBuilder.buildShl(
3245           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3246 
3247       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3248       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3249     }
3250   } else {
3251     if (Amt.ugt(VTBits)) {
3252       Hi = Lo = MIRBuilder.buildAShr(
3253           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3254     } else if (Amt.ugt(NVTBits)) {
3255       Lo = MIRBuilder.buildAShr(NVT, InH,
3256                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3257       Hi = MIRBuilder.buildAShr(NVT, InH,
3258                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3259     } else if (Amt == NVTBits) {
3260       Lo = InH;
3261       Hi = MIRBuilder.buildAShr(NVT, InH,
3262                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3263     } else {
3264       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3265 
3266       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3267       auto OrRHS = MIRBuilder.buildShl(
3268           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3269 
3270       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3271       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3272     }
3273   }
3274 
3275   MIRBuilder.buildMerge(MI.getOperand(0), {Lo.getReg(), Hi.getReg()});
3276   MI.eraseFromParent();
3277 
3278   return Legalized;
3279 }
3280 
3281 // TODO: Optimize if constant shift amount.
3282 LegalizerHelper::LegalizeResult
3283 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3284                                    LLT RequestedTy) {
3285   if (TypeIdx == 1) {
3286     Observer.changingInstr(MI);
3287     narrowScalarSrc(MI, RequestedTy, 2);
3288     Observer.changedInstr(MI);
3289     return Legalized;
3290   }
3291 
3292   Register DstReg = MI.getOperand(0).getReg();
3293   LLT DstTy = MRI.getType(DstReg);
3294   if (DstTy.isVector())
3295     return UnableToLegalize;
3296 
3297   Register Amt = MI.getOperand(2).getReg();
3298   LLT ShiftAmtTy = MRI.getType(Amt);
3299   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3300   if (DstEltSize % 2 != 0)
3301     return UnableToLegalize;
3302 
3303   // Ignore the input type. We can only go to exactly half the size of the
3304   // input. If that isn't small enough, the resulting pieces will be further
3305   // legalized.
3306   const unsigned NewBitSize = DstEltSize / 2;
3307   const LLT HalfTy = LLT::scalar(NewBitSize);
3308   const LLT CondTy = LLT::scalar(1);
3309 
3310   if (const MachineInstr *KShiftAmt =
3311           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3312     return narrowScalarShiftByConstant(
3313         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3314   }
3315 
3316   // TODO: Expand with known bits.
3317 
3318   // Handle the fully general expansion by an unknown amount.
3319   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3320 
3321   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3322   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3323   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3324 
3325   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3326   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3327 
3328   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3329   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3330   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3331 
3332   Register ResultRegs[2];
3333   switch (MI.getOpcode()) {
3334   case TargetOpcode::G_SHL: {
3335     // Short: ShAmt < NewBitSize
3336     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3337 
3338     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3339     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3340     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3341 
3342     // Long: ShAmt >= NewBitSize
3343     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
3344     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3345 
3346     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3347     auto Hi = MIRBuilder.buildSelect(
3348         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3349 
3350     ResultRegs[0] = Lo.getReg(0);
3351     ResultRegs[1] = Hi.getReg(0);
3352     break;
3353   }
3354   case TargetOpcode::G_LSHR:
3355   case TargetOpcode::G_ASHR: {
3356     // Short: ShAmt < NewBitSize
3357     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3358 
3359     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3360     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3361     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3362 
3363     // Long: ShAmt >= NewBitSize
3364     MachineInstrBuilder HiL;
3365     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3366       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
3367     } else {
3368       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3369       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
3370     }
3371     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3372                                      {InH, AmtExcess});     // Lo from Hi part.
3373 
3374     auto Lo = MIRBuilder.buildSelect(
3375         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3376 
3377     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3378 
3379     ResultRegs[0] = Lo.getReg(0);
3380     ResultRegs[1] = Hi.getReg(0);
3381     break;
3382   }
3383   default:
3384     llvm_unreachable("not a shift");
3385   }
3386 
3387   MIRBuilder.buildMerge(DstReg, ResultRegs);
3388   MI.eraseFromParent();
3389   return Legalized;
3390 }
3391 
3392 LegalizerHelper::LegalizeResult
3393 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3394                                        LLT MoreTy) {
3395   assert(TypeIdx == 0 && "Expecting only Idx 0");
3396 
3397   Observer.changingInstr(MI);
3398   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3399     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3400     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3401     moreElementsVectorSrc(MI, MoreTy, I);
3402   }
3403 
3404   MachineBasicBlock &MBB = *MI.getParent();
3405   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3406   moreElementsVectorDst(MI, MoreTy, 0);
3407   Observer.changedInstr(MI);
3408   return Legalized;
3409 }
3410 
3411 LegalizerHelper::LegalizeResult
3412 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3413                                     LLT MoreTy) {
3414   MIRBuilder.setInstr(MI);
3415   unsigned Opc = MI.getOpcode();
3416   switch (Opc) {
3417   case TargetOpcode::G_IMPLICIT_DEF:
3418   case TargetOpcode::G_LOAD: {
3419     if (TypeIdx != 0)
3420       return UnableToLegalize;
3421     Observer.changingInstr(MI);
3422     moreElementsVectorDst(MI, MoreTy, 0);
3423     Observer.changedInstr(MI);
3424     return Legalized;
3425   }
3426   case TargetOpcode::G_STORE:
3427     if (TypeIdx != 0)
3428       return UnableToLegalize;
3429     Observer.changingInstr(MI);
3430     moreElementsVectorSrc(MI, MoreTy, 0);
3431     Observer.changedInstr(MI);
3432     return Legalized;
3433   case TargetOpcode::G_AND:
3434   case TargetOpcode::G_OR:
3435   case TargetOpcode::G_XOR:
3436   case TargetOpcode::G_SMIN:
3437   case TargetOpcode::G_SMAX:
3438   case TargetOpcode::G_UMIN:
3439   case TargetOpcode::G_UMAX:
3440   case TargetOpcode::G_FMINNUM:
3441   case TargetOpcode::G_FMAXNUM:
3442   case TargetOpcode::G_FMINNUM_IEEE:
3443   case TargetOpcode::G_FMAXNUM_IEEE:
3444   case TargetOpcode::G_FMINIMUM:
3445   case TargetOpcode::G_FMAXIMUM: {
3446     Observer.changingInstr(MI);
3447     moreElementsVectorSrc(MI, MoreTy, 1);
3448     moreElementsVectorSrc(MI, MoreTy, 2);
3449     moreElementsVectorDst(MI, MoreTy, 0);
3450     Observer.changedInstr(MI);
3451     return Legalized;
3452   }
3453   case TargetOpcode::G_EXTRACT:
3454     if (TypeIdx != 1)
3455       return UnableToLegalize;
3456     Observer.changingInstr(MI);
3457     moreElementsVectorSrc(MI, MoreTy, 1);
3458     Observer.changedInstr(MI);
3459     return Legalized;
3460   case TargetOpcode::G_INSERT:
3461     if (TypeIdx != 0)
3462       return UnableToLegalize;
3463     Observer.changingInstr(MI);
3464     moreElementsVectorSrc(MI, MoreTy, 1);
3465     moreElementsVectorDst(MI, MoreTy, 0);
3466     Observer.changedInstr(MI);
3467     return Legalized;
3468   case TargetOpcode::G_SELECT:
3469     if (TypeIdx != 0)
3470       return UnableToLegalize;
3471     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3472       return UnableToLegalize;
3473 
3474     Observer.changingInstr(MI);
3475     moreElementsVectorSrc(MI, MoreTy, 2);
3476     moreElementsVectorSrc(MI, MoreTy, 3);
3477     moreElementsVectorDst(MI, MoreTy, 0);
3478     Observer.changedInstr(MI);
3479     return Legalized;
3480   case TargetOpcode::G_UNMERGE_VALUES: {
3481     if (TypeIdx != 1)
3482       return UnableToLegalize;
3483 
3484     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3485     int NumDst = MI.getNumOperands() - 1;
3486     moreElementsVectorSrc(MI, MoreTy, NumDst);
3487 
3488     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3489     for (int I = 0; I != NumDst; ++I)
3490       MIB.addDef(MI.getOperand(I).getReg());
3491 
3492     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3493     for (int I = NumDst; I != NewNumDst; ++I)
3494       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3495 
3496     MIB.addUse(MI.getOperand(NumDst).getReg());
3497     MI.eraseFromParent();
3498     return Legalized;
3499   }
3500   case TargetOpcode::G_PHI:
3501     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3502   default:
3503     return UnableToLegalize;
3504   }
3505 }
3506 
3507 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3508                                         ArrayRef<Register> Src1Regs,
3509                                         ArrayRef<Register> Src2Regs,
3510                                         LLT NarrowTy) {
3511   MachineIRBuilder &B = MIRBuilder;
3512   unsigned SrcParts = Src1Regs.size();
3513   unsigned DstParts = DstRegs.size();
3514 
3515   unsigned DstIdx = 0; // Low bits of the result.
3516   Register FactorSum =
3517       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3518   DstRegs[DstIdx] = FactorSum;
3519 
3520   unsigned CarrySumPrevDstIdx;
3521   SmallVector<Register, 4> Factors;
3522 
3523   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3524     // Collect low parts of muls for DstIdx.
3525     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3526          i <= std::min(DstIdx, SrcParts - 1); ++i) {
3527       MachineInstrBuilder Mul =
3528           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3529       Factors.push_back(Mul.getReg(0));
3530     }
3531     // Collect high parts of muls from previous DstIdx.
3532     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3533          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3534       MachineInstrBuilder Umulh =
3535           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3536       Factors.push_back(Umulh.getReg(0));
3537     }
3538     // Add CarrySum from additions calculated for previous DstIdx.
3539     if (DstIdx != 1) {
3540       Factors.push_back(CarrySumPrevDstIdx);
3541     }
3542 
3543     Register CarrySum;
3544     // Add all factors and accumulate all carries into CarrySum.
3545     if (DstIdx != DstParts - 1) {
3546       MachineInstrBuilder Uaddo =
3547           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3548       FactorSum = Uaddo.getReg(0);
3549       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3550       for (unsigned i = 2; i < Factors.size(); ++i) {
3551         MachineInstrBuilder Uaddo =
3552             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3553         FactorSum = Uaddo.getReg(0);
3554         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3555         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3556       }
3557     } else {
3558       // Since value for the next index is not calculated, neither is CarrySum.
3559       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3560       for (unsigned i = 2; i < Factors.size(); ++i)
3561         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3562     }
3563 
3564     CarrySumPrevDstIdx = CarrySum;
3565     DstRegs[DstIdx] = FactorSum;
3566     Factors.clear();
3567   }
3568 }
3569 
3570 LegalizerHelper::LegalizeResult
3571 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3572   Register DstReg = MI.getOperand(0).getReg();
3573   Register Src1 = MI.getOperand(1).getReg();
3574   Register Src2 = MI.getOperand(2).getReg();
3575 
3576   LLT Ty = MRI.getType(DstReg);
3577   if (Ty.isVector())
3578     return UnableToLegalize;
3579 
3580   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3581   unsigned DstSize = Ty.getSizeInBits();
3582   unsigned NarrowSize = NarrowTy.getSizeInBits();
3583   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3584     return UnableToLegalize;
3585 
3586   unsigned NumDstParts = DstSize / NarrowSize;
3587   unsigned NumSrcParts = SrcSize / NarrowSize;
3588   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3589   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3590 
3591   SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3592   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3593   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3594   DstTmpRegs.resize(DstTmpParts);
3595   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3596 
3597   // Take only high half of registers if this is high mul.
3598   ArrayRef<Register> DstRegs(
3599       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3600   MIRBuilder.buildMerge(DstReg, DstRegs);
3601   MI.eraseFromParent();
3602   return Legalized;
3603 }
3604 
3605 LegalizerHelper::LegalizeResult
3606 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3607                                      LLT NarrowTy) {
3608   if (TypeIdx != 1)
3609     return UnableToLegalize;
3610 
3611   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3612 
3613   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3614   // FIXME: add support for when SizeOp1 isn't an exact multiple of
3615   // NarrowSize.
3616   if (SizeOp1 % NarrowSize != 0)
3617     return UnableToLegalize;
3618   int NumParts = SizeOp1 / NarrowSize;
3619 
3620   SmallVector<Register, 2> SrcRegs, DstRegs;
3621   SmallVector<uint64_t, 2> Indexes;
3622   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3623 
3624   Register OpReg = MI.getOperand(0).getReg();
3625   uint64_t OpStart = MI.getOperand(2).getImm();
3626   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3627   for (int i = 0; i < NumParts; ++i) {
3628     unsigned SrcStart = i * NarrowSize;
3629 
3630     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3631       // No part of the extract uses this subregister, ignore it.
3632       continue;
3633     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3634       // The entire subregister is extracted, forward the value.
3635       DstRegs.push_back(SrcRegs[i]);
3636       continue;
3637     }
3638 
3639     // OpSegStart is where this destination segment would start in OpReg if it
3640     // extended infinitely in both directions.
3641     int64_t ExtractOffset;
3642     uint64_t SegSize;
3643     if (OpStart < SrcStart) {
3644       ExtractOffset = 0;
3645       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3646     } else {
3647       ExtractOffset = OpStart - SrcStart;
3648       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3649     }
3650 
3651     Register SegReg = SrcRegs[i];
3652     if (ExtractOffset != 0 || SegSize != NarrowSize) {
3653       // A genuine extract is needed.
3654       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3655       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3656     }
3657 
3658     DstRegs.push_back(SegReg);
3659   }
3660 
3661   Register DstReg = MI.getOperand(0).getReg();
3662   if(MRI.getType(DstReg).isVector())
3663     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3664   else
3665     MIRBuilder.buildMerge(DstReg, DstRegs);
3666   MI.eraseFromParent();
3667   return Legalized;
3668 }
3669 
3670 LegalizerHelper::LegalizeResult
3671 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3672                                     LLT NarrowTy) {
3673   // FIXME: Don't know how to handle secondary types yet.
3674   if (TypeIdx != 0)
3675     return UnableToLegalize;
3676 
3677   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3678   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3679 
3680   // FIXME: add support for when SizeOp0 isn't an exact multiple of
3681   // NarrowSize.
3682   if (SizeOp0 % NarrowSize != 0)
3683     return UnableToLegalize;
3684 
3685   int NumParts = SizeOp0 / NarrowSize;
3686 
3687   SmallVector<Register, 2> SrcRegs, DstRegs;
3688   SmallVector<uint64_t, 2> Indexes;
3689   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3690 
3691   Register OpReg = MI.getOperand(2).getReg();
3692   uint64_t OpStart = MI.getOperand(3).getImm();
3693   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3694   for (int i = 0; i < NumParts; ++i) {
3695     unsigned DstStart = i * NarrowSize;
3696 
3697     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3698       // No part of the insert affects this subregister, forward the original.
3699       DstRegs.push_back(SrcRegs[i]);
3700       continue;
3701     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3702       // The entire subregister is defined by this insert, forward the new
3703       // value.
3704       DstRegs.push_back(OpReg);
3705       continue;
3706     }
3707 
3708     // OpSegStart is where this destination segment would start in OpReg if it
3709     // extended infinitely in both directions.
3710     int64_t ExtractOffset, InsertOffset;
3711     uint64_t SegSize;
3712     if (OpStart < DstStart) {
3713       InsertOffset = 0;
3714       ExtractOffset = DstStart - OpStart;
3715       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3716     } else {
3717       InsertOffset = OpStart - DstStart;
3718       ExtractOffset = 0;
3719       SegSize =
3720         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3721     }
3722 
3723     Register SegReg = OpReg;
3724     if (ExtractOffset != 0 || SegSize != OpSize) {
3725       // A genuine extract is needed.
3726       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3727       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3728     }
3729 
3730     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3731     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3732     DstRegs.push_back(DstReg);
3733   }
3734 
3735   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3736   Register DstReg = MI.getOperand(0).getReg();
3737   if(MRI.getType(DstReg).isVector())
3738     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3739   else
3740     MIRBuilder.buildMerge(DstReg, DstRegs);
3741   MI.eraseFromParent();
3742   return Legalized;
3743 }
3744 
3745 LegalizerHelper::LegalizeResult
3746 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3747                                    LLT NarrowTy) {
3748   Register DstReg = MI.getOperand(0).getReg();
3749   LLT DstTy = MRI.getType(DstReg);
3750 
3751   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3752 
3753   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3754   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3755   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3756   LLT LeftoverTy;
3757   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3758                     Src0Regs, Src0LeftoverRegs))
3759     return UnableToLegalize;
3760 
3761   LLT Unused;
3762   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3763                     Src1Regs, Src1LeftoverRegs))
3764     llvm_unreachable("inconsistent extractParts result");
3765 
3766   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3767     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3768                                         {Src0Regs[I], Src1Regs[I]});
3769     DstRegs.push_back(Inst.getReg(0));
3770   }
3771 
3772   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3773     auto Inst = MIRBuilder.buildInstr(
3774       MI.getOpcode(),
3775       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3776     DstLeftoverRegs.push_back(Inst.getReg(0));
3777   }
3778 
3779   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3780               LeftoverTy, DstLeftoverRegs);
3781 
3782   MI.eraseFromParent();
3783   return Legalized;
3784 }
3785 
3786 LegalizerHelper::LegalizeResult
3787 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
3788                                  LLT NarrowTy) {
3789   if (TypeIdx != 0)
3790     return UnableToLegalize;
3791 
3792   Register DstReg = MI.getOperand(0).getReg();
3793   Register SrcReg = MI.getOperand(1).getReg();
3794 
3795   LLT DstTy = MRI.getType(DstReg);
3796   if (DstTy.isVector())
3797     return UnableToLegalize;
3798 
3799   SmallVector<Register, 8> Parts;
3800   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3801   buildLCMMerge(DstReg, NarrowTy, GCDTy, Parts, MI.getOpcode());
3802   MI.eraseFromParent();
3803   return Legalized;
3804 }
3805 
3806 LegalizerHelper::LegalizeResult
3807 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3808                                     LLT NarrowTy) {
3809   if (TypeIdx != 0)
3810     return UnableToLegalize;
3811 
3812   Register CondReg = MI.getOperand(1).getReg();
3813   LLT CondTy = MRI.getType(CondReg);
3814   if (CondTy.isVector()) // TODO: Handle vselect
3815     return UnableToLegalize;
3816 
3817   Register DstReg = MI.getOperand(0).getReg();
3818   LLT DstTy = MRI.getType(DstReg);
3819 
3820   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3821   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3822   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3823   LLT LeftoverTy;
3824   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3825                     Src1Regs, Src1LeftoverRegs))
3826     return UnableToLegalize;
3827 
3828   LLT Unused;
3829   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3830                     Src2Regs, Src2LeftoverRegs))
3831     llvm_unreachable("inconsistent extractParts result");
3832 
3833   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3834     auto Select = MIRBuilder.buildSelect(NarrowTy,
3835                                          CondReg, Src1Regs[I], Src2Regs[I]);
3836     DstRegs.push_back(Select.getReg(0));
3837   }
3838 
3839   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3840     auto Select = MIRBuilder.buildSelect(
3841       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3842     DstLeftoverRegs.push_back(Select.getReg(0));
3843   }
3844 
3845   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3846               LeftoverTy, DstLeftoverRegs);
3847 
3848   MI.eraseFromParent();
3849   return Legalized;
3850 }
3851 
3852 LegalizerHelper::LegalizeResult
3853 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3854   unsigned Opc = MI.getOpcode();
3855   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3856   auto isSupported = [this](const LegalityQuery &Q) {
3857     auto QAction = LI.getAction(Q).Action;
3858     return QAction == Legal || QAction == Libcall || QAction == Custom;
3859   };
3860   switch (Opc) {
3861   default:
3862     return UnableToLegalize;
3863   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3864     // This trivially expands to CTLZ.
3865     Observer.changingInstr(MI);
3866     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3867     Observer.changedInstr(MI);
3868     return Legalized;
3869   }
3870   case TargetOpcode::G_CTLZ: {
3871     Register SrcReg = MI.getOperand(1).getReg();
3872     unsigned Len = Ty.getSizeInBits();
3873     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3874       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3875       auto MIBCtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(Ty, SrcReg);
3876       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3877       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3878       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3879                                           SrcReg, MIBZero);
3880       MIRBuilder.buildSelect(MI.getOperand(0), MIBICmp, MIBLen, MIBCtlzZU);
3881       MI.eraseFromParent();
3882       return Legalized;
3883     }
3884     // for now, we do this:
3885     // NewLen = NextPowerOf2(Len);
3886     // x = x | (x >> 1);
3887     // x = x | (x >> 2);
3888     // ...
3889     // x = x | (x >>16);
3890     // x = x | (x >>32); // for 64-bit input
3891     // Upto NewLen/2
3892     // return Len - popcount(x);
3893     //
3894     // Ref: "Hacker's Delight" by Henry Warren
3895     Register Op = SrcReg;
3896     unsigned NewLen = PowerOf2Ceil(Len);
3897     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3898       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3899       auto MIBOp =
3900           MIRBuilder.buildOr(Ty, Op, MIRBuilder.buildLShr(Ty, Op, MIBShiftAmt));
3901       Op = MIBOp.getReg(0);
3902     }
3903     auto MIBPop = MIRBuilder.buildCTPOP(Ty, Op);
3904     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(Ty, Len),
3905                         MIBPop);
3906     MI.eraseFromParent();
3907     return Legalized;
3908   }
3909   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3910     // This trivially expands to CTTZ.
3911     Observer.changingInstr(MI);
3912     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3913     Observer.changedInstr(MI);
3914     return Legalized;
3915   }
3916   case TargetOpcode::G_CTTZ: {
3917     Register SrcReg = MI.getOperand(1).getReg();
3918     unsigned Len = Ty.getSizeInBits();
3919     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3920       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3921       // zero.
3922       auto MIBCttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(Ty, SrcReg);
3923       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3924       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3925       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3926                                           SrcReg, MIBZero);
3927       MIRBuilder.buildSelect(MI.getOperand(0), MIBICmp, MIBLen, MIBCttzZU);
3928       MI.eraseFromParent();
3929       return Legalized;
3930     }
3931     // for now, we use: { return popcount(~x & (x - 1)); }
3932     // unless the target has ctlz but not ctpop, in which case we use:
3933     // { return 32 - nlz(~x & (x-1)); }
3934     // Ref: "Hacker's Delight" by Henry Warren
3935     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3936     auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
3937     auto MIBTmp = MIRBuilder.buildAnd(
3938         Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
3939     if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3940         isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3941       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3942       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
3943                           MIRBuilder.buildCTLZ(Ty, MIBTmp));
3944       MI.eraseFromParent();
3945       return Legalized;
3946     }
3947     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3948     MI.getOperand(1).setReg(MIBTmp.getReg(0));
3949     return Legalized;
3950   }
3951   }
3952 }
3953 
3954 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3955 // representation.
3956 LegalizerHelper::LegalizeResult
3957 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3958   Register Dst = MI.getOperand(0).getReg();
3959   Register Src = MI.getOperand(1).getReg();
3960   const LLT S64 = LLT::scalar(64);
3961   const LLT S32 = LLT::scalar(32);
3962   const LLT S1 = LLT::scalar(1);
3963 
3964   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3965 
3966   // unsigned cul2f(ulong u) {
3967   //   uint lz = clz(u);
3968   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
3969   //   u = (u << lz) & 0x7fffffffffffffffUL;
3970   //   ulong t = u & 0xffffffffffUL;
3971   //   uint v = (e << 23) | (uint)(u >> 40);
3972   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3973   //   return as_float(v + r);
3974   // }
3975 
3976   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3977   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3978 
3979   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3980 
3981   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3982   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3983 
3984   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3985   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3986 
3987   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3988   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3989 
3990   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3991 
3992   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3993   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3994 
3995   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3996   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3997   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3998 
3999   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
4000   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
4001   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
4002   auto One = MIRBuilder.buildConstant(S32, 1);
4003 
4004   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
4005   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
4006   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
4007   MIRBuilder.buildAdd(Dst, V, R);
4008 
4009   return Legalized;
4010 }
4011 
4012 LegalizerHelper::LegalizeResult
4013 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4014   Register Dst = MI.getOperand(0).getReg();
4015   Register Src = MI.getOperand(1).getReg();
4016   LLT DstTy = MRI.getType(Dst);
4017   LLT SrcTy = MRI.getType(Src);
4018 
4019   if (SrcTy == LLT::scalar(1)) {
4020     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
4021     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4022     MIRBuilder.buildSelect(Dst, Src, True, False);
4023     MI.eraseFromParent();
4024     return Legalized;
4025   }
4026 
4027   if (SrcTy != LLT::scalar(64))
4028     return UnableToLegalize;
4029 
4030   if (DstTy == LLT::scalar(32)) {
4031     // TODO: SelectionDAG has several alternative expansions to port which may
4032     // be more reasonble depending on the available instructions. If a target
4033     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
4034     // intermediate type, this is probably worse.
4035     return lowerU64ToF32BitOps(MI);
4036   }
4037 
4038   return UnableToLegalize;
4039 }
4040 
4041 LegalizerHelper::LegalizeResult
4042 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4043   Register Dst = MI.getOperand(0).getReg();
4044   Register Src = MI.getOperand(1).getReg();
4045   LLT DstTy = MRI.getType(Dst);
4046   LLT SrcTy = MRI.getType(Src);
4047 
4048   const LLT S64 = LLT::scalar(64);
4049   const LLT S32 = LLT::scalar(32);
4050   const LLT S1 = LLT::scalar(1);
4051 
4052   if (SrcTy == S1) {
4053     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
4054     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4055     MIRBuilder.buildSelect(Dst, Src, True, False);
4056     MI.eraseFromParent();
4057     return Legalized;
4058   }
4059 
4060   if (SrcTy != S64)
4061     return UnableToLegalize;
4062 
4063   if (DstTy == S32) {
4064     // signed cl2f(long l) {
4065     //   long s = l >> 63;
4066     //   float r = cul2f((l + s) ^ s);
4067     //   return s ? -r : r;
4068     // }
4069     Register L = Src;
4070     auto SignBit = MIRBuilder.buildConstant(S64, 63);
4071     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
4072 
4073     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
4074     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
4075     auto R = MIRBuilder.buildUITOFP(S32, Xor);
4076 
4077     auto RNeg = MIRBuilder.buildFNeg(S32, R);
4078     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
4079                                             MIRBuilder.buildConstant(S64, 0));
4080     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
4081     return Legalized;
4082   }
4083 
4084   return UnableToLegalize;
4085 }
4086 
4087 LegalizerHelper::LegalizeResult
4088 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4089   Register Dst = MI.getOperand(0).getReg();
4090   Register Src = MI.getOperand(1).getReg();
4091   LLT DstTy = MRI.getType(Dst);
4092   LLT SrcTy = MRI.getType(Src);
4093   const LLT S64 = LLT::scalar(64);
4094   const LLT S32 = LLT::scalar(32);
4095 
4096   if (SrcTy != S64 && SrcTy != S32)
4097     return UnableToLegalize;
4098   if (DstTy != S32 && DstTy != S64)
4099     return UnableToLegalize;
4100 
4101   // FPTOSI gives same result as FPTOUI for positive signed integers.
4102   // FPTOUI needs to deal with fp values that convert to unsigned integers
4103   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
4104 
4105   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
4106   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
4107                                                 : APFloat::IEEEdouble(),
4108                     APInt::getNullValue(SrcTy.getSizeInBits()));
4109   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
4110 
4111   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
4112 
4113   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
4114   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
4115   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
4116   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
4117   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
4118   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
4119   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
4120 
4121   const LLT S1 = LLT::scalar(1);
4122 
4123   MachineInstrBuilder FCMP =
4124       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
4125   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
4126 
4127   MI.eraseFromParent();
4128   return Legalized;
4129 }
4130 
4131 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
4132   switch (Opc) {
4133   case TargetOpcode::G_SMIN:
4134     return CmpInst::ICMP_SLT;
4135   case TargetOpcode::G_SMAX:
4136     return CmpInst::ICMP_SGT;
4137   case TargetOpcode::G_UMIN:
4138     return CmpInst::ICMP_ULT;
4139   case TargetOpcode::G_UMAX:
4140     return CmpInst::ICMP_UGT;
4141   default:
4142     llvm_unreachable("not in integer min/max");
4143   }
4144 }
4145 
4146 LegalizerHelper::LegalizeResult
4147 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4148   Register Dst = MI.getOperand(0).getReg();
4149   Register Src0 = MI.getOperand(1).getReg();
4150   Register Src1 = MI.getOperand(2).getReg();
4151 
4152   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
4153   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
4154 
4155   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
4156   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
4157 
4158   MI.eraseFromParent();
4159   return Legalized;
4160 }
4161 
4162 LegalizerHelper::LegalizeResult
4163 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4164   Register Dst = MI.getOperand(0).getReg();
4165   Register Src0 = MI.getOperand(1).getReg();
4166   Register Src1 = MI.getOperand(2).getReg();
4167 
4168   const LLT Src0Ty = MRI.getType(Src0);
4169   const LLT Src1Ty = MRI.getType(Src1);
4170 
4171   const int Src0Size = Src0Ty.getScalarSizeInBits();
4172   const int Src1Size = Src1Ty.getScalarSizeInBits();
4173 
4174   auto SignBitMask = MIRBuilder.buildConstant(
4175     Src0Ty, APInt::getSignMask(Src0Size));
4176 
4177   auto NotSignBitMask = MIRBuilder.buildConstant(
4178     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
4179 
4180   auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
4181   MachineInstr *Or;
4182 
4183   if (Src0Ty == Src1Ty) {
4184     auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
4185     Or = MIRBuilder.buildOr(Dst, And0, And1);
4186   } else if (Src0Size > Src1Size) {
4187     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
4188     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
4189     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
4190     auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
4191     Or = MIRBuilder.buildOr(Dst, And0, And1);
4192   } else {
4193     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
4194     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
4195     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
4196     auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
4197     Or = MIRBuilder.buildOr(Dst, And0, And1);
4198   }
4199 
4200   // Be careful about setting nsz/nnan/ninf on every instruction, since the
4201   // constants are a nan and -0.0, but the final result should preserve
4202   // everything.
4203   if (unsigned Flags = MI.getFlags())
4204     Or->setFlags(Flags);
4205 
4206   MI.eraseFromParent();
4207   return Legalized;
4208 }
4209 
4210 LegalizerHelper::LegalizeResult
4211 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
4212   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4213     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4214 
4215   Register Dst = MI.getOperand(0).getReg();
4216   Register Src0 = MI.getOperand(1).getReg();
4217   Register Src1 = MI.getOperand(2).getReg();
4218   LLT Ty = MRI.getType(Dst);
4219 
4220   if (!MI.getFlag(MachineInstr::FmNoNans)) {
4221     // Insert canonicalizes if it's possible we need to quiet to get correct
4222     // sNaN behavior.
4223 
4224     // Note this must be done here, and not as an optimization combine in the
4225     // absence of a dedicate quiet-snan instruction as we're using an
4226     // omni-purpose G_FCANONICALIZE.
4227     if (!isKnownNeverSNaN(Src0, MRI))
4228       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4229 
4230     if (!isKnownNeverSNaN(Src1, MRI))
4231       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4232   }
4233 
4234   // If there are no nans, it's safe to simply replace this with the non-IEEE
4235   // version.
4236   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4237   MI.eraseFromParent();
4238   return Legalized;
4239 }
4240 
4241 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
4242   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4243   Register DstReg = MI.getOperand(0).getReg();
4244   LLT Ty = MRI.getType(DstReg);
4245   unsigned Flags = MI.getFlags();
4246 
4247   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4248                                   Flags);
4249   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4250   MI.eraseFromParent();
4251   return Legalized;
4252 }
4253 
4254 LegalizerHelper::LegalizeResult
4255 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
4256   Register DstReg = MI.getOperand(0).getReg();
4257   Register SrcReg = MI.getOperand(1).getReg();
4258   unsigned Flags = MI.getFlags();
4259   LLT Ty = MRI.getType(DstReg);
4260   const LLT CondTy = Ty.changeElementSize(1);
4261 
4262   // result = trunc(src);
4263   // if (src < 0.0 && src != result)
4264   //   result += -1.0.
4265 
4266   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
4267   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
4268 
4269   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
4270                                   SrcReg, Zero, Flags);
4271   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
4272                                       SrcReg, Trunc, Flags);
4273   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
4274   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
4275 
4276   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
4277   MI.eraseFromParent();
4278   return Legalized;
4279 }
4280 
4281 LegalizerHelper::LegalizeResult
4282 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
4283   const unsigned NumDst = MI.getNumOperands() - 1;
4284   const Register SrcReg = MI.getOperand(NumDst).getReg();
4285   LLT SrcTy = MRI.getType(SrcReg);
4286 
4287   Register Dst0Reg = MI.getOperand(0).getReg();
4288   LLT DstTy = MRI.getType(Dst0Reg);
4289 
4290 
4291   // Expand scalarizing unmerge as bitcast to integer and shift.
4292   if (!DstTy.isVector() && SrcTy.isVector() &&
4293       SrcTy.getElementType() == DstTy) {
4294     LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
4295     Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
4296 
4297     MIRBuilder.buildTrunc(Dst0Reg, Cast);
4298 
4299     const unsigned DstSize = DstTy.getSizeInBits();
4300     unsigned Offset = DstSize;
4301     for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
4302       auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
4303       auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
4304       MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
4305     }
4306 
4307     MI.eraseFromParent();
4308     return Legalized;
4309   }
4310 
4311   return UnableToLegalize;
4312 }
4313 
4314 LegalizerHelper::LegalizeResult
4315 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
4316   Register DstReg = MI.getOperand(0).getReg();
4317   Register Src0Reg = MI.getOperand(1).getReg();
4318   Register Src1Reg = MI.getOperand(2).getReg();
4319   LLT Src0Ty = MRI.getType(Src0Reg);
4320   LLT DstTy = MRI.getType(DstReg);
4321   LLT IdxTy = LLT::scalar(32);
4322 
4323   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4324 
4325   if (DstTy.isScalar()) {
4326     if (Src0Ty.isVector())
4327       return UnableToLegalize;
4328 
4329     // This is just a SELECT.
4330     assert(Mask.size() == 1 && "Expected a single mask element");
4331     Register Val;
4332     if (Mask[0] < 0 || Mask[0] > 1)
4333       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4334     else
4335       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4336     MIRBuilder.buildCopy(DstReg, Val);
4337     MI.eraseFromParent();
4338     return Legalized;
4339   }
4340 
4341   Register Undef;
4342   SmallVector<Register, 32> BuildVec;
4343   LLT EltTy = DstTy.getElementType();
4344 
4345   for (int Idx : Mask) {
4346     if (Idx < 0) {
4347       if (!Undef.isValid())
4348         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4349       BuildVec.push_back(Undef);
4350       continue;
4351     }
4352 
4353     if (Src0Ty.isScalar()) {
4354       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4355     } else {
4356       int NumElts = Src0Ty.getNumElements();
4357       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
4358       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
4359       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
4360       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
4361       BuildVec.push_back(Extract.getReg(0));
4362     }
4363   }
4364 
4365   MIRBuilder.buildBuildVector(DstReg, BuildVec);
4366   MI.eraseFromParent();
4367   return Legalized;
4368 }
4369 
4370 LegalizerHelper::LegalizeResult
4371 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
4372   Register Dst = MI.getOperand(0).getReg();
4373   Register AllocSize = MI.getOperand(1).getReg();
4374   unsigned Align = MI.getOperand(2).getImm();
4375 
4376   const auto &MF = *MI.getMF();
4377   const auto &TLI = *MF.getSubtarget().getTargetLowering();
4378 
4379   LLT PtrTy = MRI.getType(Dst);
4380   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
4381 
4382   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
4383   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
4384   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
4385 
4386   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4387   // have to generate an extra instruction to negate the alloc and then use
4388   // G_PTR_ADD to add the negative offset.
4389   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
4390   if (Align) {
4391     APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
4392     AlignMask.negate();
4393     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
4394     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
4395   }
4396 
4397   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
4398   MIRBuilder.buildCopy(SPReg, SPTmp);
4399   MIRBuilder.buildCopy(Dst, SPTmp);
4400 
4401   MI.eraseFromParent();
4402   return Legalized;
4403 }
4404 
4405 LegalizerHelper::LegalizeResult
4406 LegalizerHelper::lowerExtract(MachineInstr &MI) {
4407   Register Dst = MI.getOperand(0).getReg();
4408   Register Src = MI.getOperand(1).getReg();
4409   unsigned Offset = MI.getOperand(2).getImm();
4410 
4411   LLT DstTy = MRI.getType(Dst);
4412   LLT SrcTy = MRI.getType(Src);
4413 
4414   if (DstTy.isScalar() &&
4415       (SrcTy.isScalar() ||
4416        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
4417     LLT SrcIntTy = SrcTy;
4418     if (!SrcTy.isScalar()) {
4419       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
4420       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
4421     }
4422 
4423     if (Offset == 0)
4424       MIRBuilder.buildTrunc(Dst, Src);
4425     else {
4426       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
4427       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
4428       MIRBuilder.buildTrunc(Dst, Shr);
4429     }
4430 
4431     MI.eraseFromParent();
4432     return Legalized;
4433   }
4434 
4435   return UnableToLegalize;
4436 }
4437 
4438 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
4439   Register Dst = MI.getOperand(0).getReg();
4440   Register Src = MI.getOperand(1).getReg();
4441   Register InsertSrc = MI.getOperand(2).getReg();
4442   uint64_t Offset = MI.getOperand(3).getImm();
4443 
4444   LLT DstTy = MRI.getType(Src);
4445   LLT InsertTy = MRI.getType(InsertSrc);
4446 
4447   if (InsertTy.isScalar() &&
4448       (DstTy.isScalar() ||
4449        (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
4450     LLT IntDstTy = DstTy;
4451     if (!DstTy.isScalar()) {
4452       IntDstTy = LLT::scalar(DstTy.getSizeInBits());
4453       Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
4454     }
4455 
4456     Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
4457     if (Offset != 0) {
4458       auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
4459       ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
4460     }
4461 
4462     APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
4463                                        InsertTy.getSizeInBits());
4464 
4465     auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
4466     auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
4467     auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
4468 
4469     MIRBuilder.buildBitcast(Dst, Or);
4470     MI.eraseFromParent();
4471     return Legalized;
4472   }
4473 
4474   return UnableToLegalize;
4475 }
4476 
4477 LegalizerHelper::LegalizeResult
4478 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
4479   Register Dst0 = MI.getOperand(0).getReg();
4480   Register Dst1 = MI.getOperand(1).getReg();
4481   Register LHS = MI.getOperand(2).getReg();
4482   Register RHS = MI.getOperand(3).getReg();
4483   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
4484 
4485   LLT Ty = MRI.getType(Dst0);
4486   LLT BoolTy = MRI.getType(Dst1);
4487 
4488   if (IsAdd)
4489     MIRBuilder.buildAdd(Dst0, LHS, RHS);
4490   else
4491     MIRBuilder.buildSub(Dst0, LHS, RHS);
4492 
4493   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
4494 
4495   auto Zero = MIRBuilder.buildConstant(Ty, 0);
4496 
4497   // For an addition, the result should be less than one of the operands (LHS)
4498   // if and only if the other operand (RHS) is negative, otherwise there will
4499   // be overflow.
4500   // For a subtraction, the result should be less than one of the operands
4501   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
4502   // otherwise there will be overflow.
4503   auto ResultLowerThanLHS =
4504       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
4505   auto ConditionRHS = MIRBuilder.buildICmp(
4506       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
4507 
4508   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
4509   MI.eraseFromParent();
4510   return Legalized;
4511 }
4512 
4513 LegalizerHelper::LegalizeResult
4514 LegalizerHelper::lowerBswap(MachineInstr &MI) {
4515   Register Dst = MI.getOperand(0).getReg();
4516   Register Src = MI.getOperand(1).getReg();
4517   const LLT Ty = MRI.getType(Src);
4518   unsigned SizeInBytes = Ty.getSizeInBytes();
4519   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
4520 
4521   // Swap most and least significant byte, set remaining bytes in Res to zero.
4522   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
4523   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
4524   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4525   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
4526 
4527   // Set i-th high/low byte in Res to i-th low/high byte from Src.
4528   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
4529     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
4530     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
4531     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
4532     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
4533     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
4534     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
4535     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
4536     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
4537     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
4538     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4539     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
4540     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
4541   }
4542   Res.getInstr()->getOperand(0).setReg(Dst);
4543 
4544   MI.eraseFromParent();
4545   return Legalized;
4546 }
4547 
4548 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
4549 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
4550                                  MachineInstrBuilder Src, APInt Mask) {
4551   const LLT Ty = Dst.getLLTTy(*B.getMRI());
4552   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
4553   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
4554   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
4555   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
4556   return B.buildOr(Dst, LHS, RHS);
4557 }
4558 
4559 LegalizerHelper::LegalizeResult
4560 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
4561   Register Dst = MI.getOperand(0).getReg();
4562   Register Src = MI.getOperand(1).getReg();
4563   const LLT Ty = MRI.getType(Src);
4564   unsigned Size = Ty.getSizeInBits();
4565 
4566   MachineInstrBuilder BSWAP =
4567       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
4568 
4569   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
4570   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
4571   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
4572   MachineInstrBuilder Swap4 =
4573       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
4574 
4575   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
4576   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
4577   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
4578   MachineInstrBuilder Swap2 =
4579       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
4580 
4581   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
4582   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
4583   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
4584   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
4585 
4586   MI.eraseFromParent();
4587   return Legalized;
4588 }
4589 
4590 LegalizerHelper::LegalizeResult
4591 LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
4592   Register Dst = MI.getOperand(0).getReg();
4593   const LLT Ty = MRI.getType(Dst);
4594   const MDString *RegStr = cast<MDString>(
4595     cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
4596 
4597   MachineFunction &MF = MIRBuilder.getMF();
4598   const TargetSubtargetInfo &STI = MF.getSubtarget();
4599   const TargetLowering *TLI = STI.getTargetLowering();
4600   Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
4601   if (!Reg.isValid())
4602     return UnableToLegalize;
4603 
4604   MIRBuilder.buildCopy(Dst, Reg);
4605   MI.eraseFromParent();
4606   return Legalized;
4607 }
4608