1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/TargetFrameLowering.h"
23 #include "llvm/CodeGen/TargetInstrInfo.h"
24 #include "llvm/CodeGen/TargetLowering.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/raw_ostream.h"
29 
30 #define DEBUG_TYPE "legalizer"
31 
32 using namespace llvm;
33 using namespace LegalizeActions;
34 using namespace MIPatternMatch;
35 
36 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
37 ///
38 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
39 /// with any leftover piece as type \p LeftoverTy
40 ///
41 /// Returns -1 in the first element of the pair if the breakdown is not
42 /// satisfiable.
43 static std::pair<int, int>
44 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
45   assert(!LeftoverTy.isValid() && "this is an out argument");
46 
47   unsigned Size = OrigTy.getSizeInBits();
48   unsigned NarrowSize = NarrowTy.getSizeInBits();
49   unsigned NumParts = Size / NarrowSize;
50   unsigned LeftoverSize = Size - NumParts * NarrowSize;
51   assert(Size > NarrowSize);
52 
53   if (LeftoverSize == 0)
54     return {NumParts, 0};
55 
56   if (NarrowTy.isVector()) {
57     unsigned EltSize = OrigTy.getScalarSizeInBits();
58     if (LeftoverSize % EltSize != 0)
59       return {-1, -1};
60     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
61   } else {
62     LeftoverTy = LLT::scalar(LeftoverSize);
63   }
64 
65   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
66   return std::make_pair(NumParts, NumLeftover);
67 }
68 
69 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
70 
71   if (!Ty.isScalar())
72     return nullptr;
73 
74   switch (Ty.getSizeInBits()) {
75   case 16:
76     return Type::getHalfTy(Ctx);
77   case 32:
78     return Type::getFloatTy(Ctx);
79   case 64:
80     return Type::getDoubleTy(Ctx);
81   case 80:
82     return Type::getX86_FP80Ty(Ctx);
83   case 128:
84     return Type::getFP128Ty(Ctx);
85   default:
86     return nullptr;
87   }
88 }
89 
90 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
91                                  GISelChangeObserver &Observer,
92                                  MachineIRBuilder &Builder)
93     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
94       LI(*MF.getSubtarget().getLegalizerInfo()),
95       TLI(*MF.getSubtarget().getTargetLowering()) { }
96 
97 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
98                                  GISelChangeObserver &Observer,
99                                  MachineIRBuilder &B)
100   : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
101     TLI(*MF.getSubtarget().getTargetLowering()) { }
102 
103 LegalizerHelper::LegalizeResult
104 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
105   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
106 
107   MIRBuilder.setInstrAndDebugLoc(MI);
108 
109   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
110       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
111     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
112   auto Step = LI.getAction(MI, MRI);
113   switch (Step.Action) {
114   case Legal:
115     LLVM_DEBUG(dbgs() << ".. Already legal\n");
116     return AlreadyLegal;
117   case Libcall:
118     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
119     return libcall(MI);
120   case NarrowScalar:
121     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
122     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
123   case WidenScalar:
124     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
125     return widenScalar(MI, Step.TypeIdx, Step.NewType);
126   case Bitcast:
127     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
128     return bitcast(MI, Step.TypeIdx, Step.NewType);
129   case Lower:
130     LLVM_DEBUG(dbgs() << ".. Lower\n");
131     return lower(MI, Step.TypeIdx, Step.NewType);
132   case FewerElements:
133     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
134     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
135   case MoreElements:
136     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
137     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
138   case Custom:
139     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
140     return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
141   default:
142     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
143     return UnableToLegalize;
144   }
145 }
146 
147 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
148                                    SmallVectorImpl<Register> &VRegs) {
149   for (int i = 0; i < NumParts; ++i)
150     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
151   MIRBuilder.buildUnmerge(VRegs, Reg);
152 }
153 
154 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
155                                    LLT MainTy, LLT &LeftoverTy,
156                                    SmallVectorImpl<Register> &VRegs,
157                                    SmallVectorImpl<Register> &LeftoverRegs) {
158   assert(!LeftoverTy.isValid() && "this is an out argument");
159 
160   unsigned RegSize = RegTy.getSizeInBits();
161   unsigned MainSize = MainTy.getSizeInBits();
162   unsigned NumParts = RegSize / MainSize;
163   unsigned LeftoverSize = RegSize - NumParts * MainSize;
164 
165   // Use an unmerge when possible.
166   if (LeftoverSize == 0) {
167     for (unsigned I = 0; I < NumParts; ++I)
168       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
169     MIRBuilder.buildUnmerge(VRegs, Reg);
170     return true;
171   }
172 
173   if (MainTy.isVector()) {
174     unsigned EltSize = MainTy.getScalarSizeInBits();
175     if (LeftoverSize % EltSize != 0)
176       return false;
177     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
178   } else {
179     LeftoverTy = LLT::scalar(LeftoverSize);
180   }
181 
182   // For irregular sizes, extract the individual parts.
183   for (unsigned I = 0; I != NumParts; ++I) {
184     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
185     VRegs.push_back(NewReg);
186     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
187   }
188 
189   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
190        Offset += LeftoverSize) {
191     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
192     LeftoverRegs.push_back(NewReg);
193     MIRBuilder.buildExtract(NewReg, Reg, Offset);
194   }
195 
196   return true;
197 }
198 
199 void LegalizerHelper::insertParts(Register DstReg,
200                                   LLT ResultTy, LLT PartTy,
201                                   ArrayRef<Register> PartRegs,
202                                   LLT LeftoverTy,
203                                   ArrayRef<Register> LeftoverRegs) {
204   if (!LeftoverTy.isValid()) {
205     assert(LeftoverRegs.empty());
206 
207     if (!ResultTy.isVector()) {
208       MIRBuilder.buildMerge(DstReg, PartRegs);
209       return;
210     }
211 
212     if (PartTy.isVector())
213       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
214     else
215       MIRBuilder.buildBuildVector(DstReg, PartRegs);
216     return;
217   }
218 
219   unsigned PartSize = PartTy.getSizeInBits();
220   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
221 
222   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
223   MIRBuilder.buildUndef(CurResultReg);
224 
225   unsigned Offset = 0;
226   for (Register PartReg : PartRegs) {
227     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
228     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
229     CurResultReg = NewResultReg;
230     Offset += PartSize;
231   }
232 
233   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
234     // Use the original output register for the final insert to avoid a copy.
235     Register NewResultReg = (I + 1 == E) ?
236       DstReg : MRI.createGenericVirtualRegister(ResultTy);
237 
238     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
239     CurResultReg = NewResultReg;
240     Offset += LeftoverPartSize;
241   }
242 }
243 
244 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
245 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
246                               const MachineInstr &MI) {
247   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
248 
249   const int StartIdx = Regs.size();
250   const int NumResults = MI.getNumOperands() - 1;
251   Regs.resize(Regs.size() + NumResults);
252   for (int I = 0; I != NumResults; ++I)
253     Regs[StartIdx + I] = MI.getOperand(I).getReg();
254 }
255 
256 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
257                                      LLT GCDTy, Register SrcReg) {
258   LLT SrcTy = MRI.getType(SrcReg);
259   if (SrcTy == GCDTy) {
260     // If the source already evenly divides the result type, we don't need to do
261     // anything.
262     Parts.push_back(SrcReg);
263   } else {
264     // Need to split into common type sized pieces.
265     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
266     getUnmergeResults(Parts, *Unmerge);
267   }
268 }
269 
270 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
271                                     LLT NarrowTy, Register SrcReg) {
272   LLT SrcTy = MRI.getType(SrcReg);
273   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
274   extractGCDType(Parts, GCDTy, SrcReg);
275   return GCDTy;
276 }
277 
278 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
279                                          SmallVectorImpl<Register> &VRegs,
280                                          unsigned PadStrategy) {
281   LLT LCMTy = getLCMType(DstTy, NarrowTy);
282 
283   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
284   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
285   int NumOrigSrc = VRegs.size();
286 
287   Register PadReg;
288 
289   // Get a value we can use to pad the source value if the sources won't evenly
290   // cover the result type.
291   if (NumOrigSrc < NumParts * NumSubParts) {
292     if (PadStrategy == TargetOpcode::G_ZEXT)
293       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
294     else if (PadStrategy == TargetOpcode::G_ANYEXT)
295       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
296     else {
297       assert(PadStrategy == TargetOpcode::G_SEXT);
298 
299       // Shift the sign bit of the low register through the high register.
300       auto ShiftAmt =
301         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
302       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
303     }
304   }
305 
306   // Registers for the final merge to be produced.
307   SmallVector<Register, 4> Remerge(NumParts);
308 
309   // Registers needed for intermediate merges, which will be merged into a
310   // source for Remerge.
311   SmallVector<Register, 4> SubMerge(NumSubParts);
312 
313   // Once we've fully read off the end of the original source bits, we can reuse
314   // the same high bits for remaining padding elements.
315   Register AllPadReg;
316 
317   // Build merges to the LCM type to cover the original result type.
318   for (int I = 0; I != NumParts; ++I) {
319     bool AllMergePartsArePadding = true;
320 
321     // Build the requested merges to the requested type.
322     for (int J = 0; J != NumSubParts; ++J) {
323       int Idx = I * NumSubParts + J;
324       if (Idx >= NumOrigSrc) {
325         SubMerge[J] = PadReg;
326         continue;
327       }
328 
329       SubMerge[J] = VRegs[Idx];
330 
331       // There are meaningful bits here we can't reuse later.
332       AllMergePartsArePadding = false;
333     }
334 
335     // If we've filled up a complete piece with padding bits, we can directly
336     // emit the natural sized constant if applicable, rather than a merge of
337     // smaller constants.
338     if (AllMergePartsArePadding && !AllPadReg) {
339       if (PadStrategy == TargetOpcode::G_ANYEXT)
340         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
341       else if (PadStrategy == TargetOpcode::G_ZEXT)
342         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
343 
344       // If this is a sign extension, we can't materialize a trivial constant
345       // with the right type and have to produce a merge.
346     }
347 
348     if (AllPadReg) {
349       // Avoid creating additional instructions if we're just adding additional
350       // copies of padding bits.
351       Remerge[I] = AllPadReg;
352       continue;
353     }
354 
355     if (NumSubParts == 1)
356       Remerge[I] = SubMerge[0];
357     else
358       Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
359 
360     // In the sign extend padding case, re-use the first all-signbit merge.
361     if (AllMergePartsArePadding && !AllPadReg)
362       AllPadReg = Remerge[I];
363   }
364 
365   VRegs = std::move(Remerge);
366   return LCMTy;
367 }
368 
369 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
370                                                ArrayRef<Register> RemergeRegs) {
371   LLT DstTy = MRI.getType(DstReg);
372 
373   // Create the merge to the widened source, and extract the relevant bits into
374   // the result.
375 
376   if (DstTy == LCMTy) {
377     MIRBuilder.buildMerge(DstReg, RemergeRegs);
378     return;
379   }
380 
381   auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
382   if (DstTy.isScalar() && LCMTy.isScalar()) {
383     MIRBuilder.buildTrunc(DstReg, Remerge);
384     return;
385   }
386 
387   if (LCMTy.isVector()) {
388     unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
389     SmallVector<Register, 8> UnmergeDefs(NumDefs);
390     UnmergeDefs[0] = DstReg;
391     for (unsigned I = 1; I != NumDefs; ++I)
392       UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
393 
394     MIRBuilder.buildUnmerge(UnmergeDefs,
395                             MIRBuilder.buildMerge(LCMTy, RemergeRegs));
396     return;
397   }
398 
399   llvm_unreachable("unhandled case");
400 }
401 
402 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
403 #define RTLIBCASE_INT(LibcallPrefix)                                           \
404   do {                                                                         \
405     switch (Size) {                                                            \
406     case 32:                                                                   \
407       return RTLIB::LibcallPrefix##32;                                         \
408     case 64:                                                                   \
409       return RTLIB::LibcallPrefix##64;                                         \
410     case 128:                                                                  \
411       return RTLIB::LibcallPrefix##128;                                        \
412     default:                                                                   \
413       llvm_unreachable("unexpected size");                                     \
414     }                                                                          \
415   } while (0)
416 
417 #define RTLIBCASE(LibcallPrefix)                                               \
418   do {                                                                         \
419     switch (Size) {                                                            \
420     case 32:                                                                   \
421       return RTLIB::LibcallPrefix##32;                                         \
422     case 64:                                                                   \
423       return RTLIB::LibcallPrefix##64;                                         \
424     case 80:                                                                   \
425       return RTLIB::LibcallPrefix##80;                                         \
426     case 128:                                                                  \
427       return RTLIB::LibcallPrefix##128;                                        \
428     default:                                                                   \
429       llvm_unreachable("unexpected size");                                     \
430     }                                                                          \
431   } while (0)
432 
433   switch (Opcode) {
434   case TargetOpcode::G_SDIV:
435     RTLIBCASE_INT(SDIV_I);
436   case TargetOpcode::G_UDIV:
437     RTLIBCASE_INT(UDIV_I);
438   case TargetOpcode::G_SREM:
439     RTLIBCASE_INT(SREM_I);
440   case TargetOpcode::G_UREM:
441     RTLIBCASE_INT(UREM_I);
442   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
443     RTLIBCASE_INT(CTLZ_I);
444   case TargetOpcode::G_FADD:
445     RTLIBCASE(ADD_F);
446   case TargetOpcode::G_FSUB:
447     RTLIBCASE(SUB_F);
448   case TargetOpcode::G_FMUL:
449     RTLIBCASE(MUL_F);
450   case TargetOpcode::G_FDIV:
451     RTLIBCASE(DIV_F);
452   case TargetOpcode::G_FEXP:
453     RTLIBCASE(EXP_F);
454   case TargetOpcode::G_FEXP2:
455     RTLIBCASE(EXP2_F);
456   case TargetOpcode::G_FREM:
457     RTLIBCASE(REM_F);
458   case TargetOpcode::G_FPOW:
459     RTLIBCASE(POW_F);
460   case TargetOpcode::G_FMA:
461     RTLIBCASE(FMA_F);
462   case TargetOpcode::G_FSIN:
463     RTLIBCASE(SIN_F);
464   case TargetOpcode::G_FCOS:
465     RTLIBCASE(COS_F);
466   case TargetOpcode::G_FLOG10:
467     RTLIBCASE(LOG10_F);
468   case TargetOpcode::G_FLOG:
469     RTLIBCASE(LOG_F);
470   case TargetOpcode::G_FLOG2:
471     RTLIBCASE(LOG2_F);
472   case TargetOpcode::G_FCEIL:
473     RTLIBCASE(CEIL_F);
474   case TargetOpcode::G_FFLOOR:
475     RTLIBCASE(FLOOR_F);
476   case TargetOpcode::G_FMINNUM:
477     RTLIBCASE(FMIN_F);
478   case TargetOpcode::G_FMAXNUM:
479     RTLIBCASE(FMAX_F);
480   case TargetOpcode::G_FSQRT:
481     RTLIBCASE(SQRT_F);
482   case TargetOpcode::G_FRINT:
483     RTLIBCASE(RINT_F);
484   case TargetOpcode::G_FNEARBYINT:
485     RTLIBCASE(NEARBYINT_F);
486   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
487     RTLIBCASE(ROUNDEVEN_F);
488   }
489   llvm_unreachable("Unknown libcall function");
490 }
491 
492 /// True if an instruction is in tail position in its caller. Intended for
493 /// legalizing libcalls as tail calls when possible.
494 static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
495                                     MachineInstr &MI) {
496   MachineBasicBlock &MBB = *MI.getParent();
497   const Function &F = MBB.getParent()->getFunction();
498 
499   // Conservatively require the attributes of the call to match those of
500   // the return. Ignore NoAlias and NonNull because they don't affect the
501   // call sequence.
502   AttributeList CallerAttrs = F.getAttributes();
503   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
504           .removeAttribute(Attribute::NoAlias)
505           .removeAttribute(Attribute::NonNull)
506           .hasAttributes())
507     return false;
508 
509   // It's not safe to eliminate the sign / zero extension of the return value.
510   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
511       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
512     return false;
513 
514   // Only tail call if the following instruction is a standard return.
515   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
516   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
517     return false;
518 
519   return true;
520 }
521 
522 LegalizerHelper::LegalizeResult
523 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
524                     const CallLowering::ArgInfo &Result,
525                     ArrayRef<CallLowering::ArgInfo> Args,
526                     const CallingConv::ID CC) {
527   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
528 
529   CallLowering::CallLoweringInfo Info;
530   Info.CallConv = CC;
531   Info.Callee = MachineOperand::CreateES(Name);
532   Info.OrigRet = Result;
533   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
534   if (!CLI.lowerCall(MIRBuilder, Info))
535     return LegalizerHelper::UnableToLegalize;
536 
537   return LegalizerHelper::Legalized;
538 }
539 
540 LegalizerHelper::LegalizeResult
541 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
542                     const CallLowering::ArgInfo &Result,
543                     ArrayRef<CallLowering::ArgInfo> Args) {
544   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
545   const char *Name = TLI.getLibcallName(Libcall);
546   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
547   return createLibcall(MIRBuilder, Name, Result, Args, CC);
548 }
549 
550 // Useful for libcalls where all operands have the same type.
551 static LegalizerHelper::LegalizeResult
552 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
553               Type *OpType) {
554   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
555 
556   SmallVector<CallLowering::ArgInfo, 3> Args;
557   for (unsigned i = 1; i < MI.getNumOperands(); i++)
558     Args.push_back({MI.getOperand(i).getReg(), OpType});
559   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
560                        Args);
561 }
562 
563 LegalizerHelper::LegalizeResult
564 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
565                        MachineInstr &MI) {
566   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
567 
568   SmallVector<CallLowering::ArgInfo, 3> Args;
569   // Add all the args, except for the last which is an imm denoting 'tail'.
570   for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
571     Register Reg = MI.getOperand(i).getReg();
572 
573     // Need derive an IR type for call lowering.
574     LLT OpLLT = MRI.getType(Reg);
575     Type *OpTy = nullptr;
576     if (OpLLT.isPointer())
577       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
578     else
579       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
580     Args.push_back({Reg, OpTy});
581   }
582 
583   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
584   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
585   RTLIB::Libcall RTLibcall;
586   unsigned Opc = MI.getOpcode();
587   switch (Opc) {
588   case TargetOpcode::G_BZERO:
589     RTLibcall = RTLIB::BZERO;
590     break;
591   case TargetOpcode::G_MEMCPY:
592     RTLibcall = RTLIB::MEMCPY;
593     break;
594   case TargetOpcode::G_MEMMOVE:
595     RTLibcall = RTLIB::MEMMOVE;
596     break;
597   case TargetOpcode::G_MEMSET:
598     RTLibcall = RTLIB::MEMSET;
599     break;
600   default:
601     return LegalizerHelper::UnableToLegalize;
602   }
603   const char *Name = TLI.getLibcallName(RTLibcall);
604 
605   // Unsupported libcall on the target.
606   if (!Name) {
607     LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
608                       << MIRBuilder.getTII().getName(Opc) << "\n");
609     return LegalizerHelper::UnableToLegalize;
610   }
611 
612   CallLowering::CallLoweringInfo Info;
613   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
614   Info.Callee = MachineOperand::CreateES(Name);
615   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
616   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
617                     isLibCallInTailPosition(MIRBuilder.getTII(), MI);
618 
619   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
620   if (!CLI.lowerCall(MIRBuilder, Info))
621     return LegalizerHelper::UnableToLegalize;
622 
623   if (Info.LoweredTailCall) {
624     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
625     // We must have a return following the call (or debug insts) to get past
626     // isLibCallInTailPosition.
627     do {
628       MachineInstr *Next = MI.getNextNode();
629       assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
630              "Expected instr following MI to be return or debug inst?");
631       // We lowered a tail call, so the call is now the return from the block.
632       // Delete the old return.
633       Next->eraseFromParent();
634     } while (MI.getNextNode());
635   }
636 
637   return LegalizerHelper::Legalized;
638 }
639 
640 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
641                                        Type *FromType) {
642   auto ToMVT = MVT::getVT(ToType);
643   auto FromMVT = MVT::getVT(FromType);
644 
645   switch (Opcode) {
646   case TargetOpcode::G_FPEXT:
647     return RTLIB::getFPEXT(FromMVT, ToMVT);
648   case TargetOpcode::G_FPTRUNC:
649     return RTLIB::getFPROUND(FromMVT, ToMVT);
650   case TargetOpcode::G_FPTOSI:
651     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
652   case TargetOpcode::G_FPTOUI:
653     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
654   case TargetOpcode::G_SITOFP:
655     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
656   case TargetOpcode::G_UITOFP:
657     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
658   }
659   llvm_unreachable("Unsupported libcall function");
660 }
661 
662 static LegalizerHelper::LegalizeResult
663 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
664                   Type *FromType) {
665   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
666   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
667                        {{MI.getOperand(1).getReg(), FromType}});
668 }
669 
670 LegalizerHelper::LegalizeResult
671 LegalizerHelper::libcall(MachineInstr &MI) {
672   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
673   unsigned Size = LLTy.getSizeInBits();
674   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
675 
676   switch (MI.getOpcode()) {
677   default:
678     return UnableToLegalize;
679   case TargetOpcode::G_SDIV:
680   case TargetOpcode::G_UDIV:
681   case TargetOpcode::G_SREM:
682   case TargetOpcode::G_UREM:
683   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
684     Type *HLTy = IntegerType::get(Ctx, Size);
685     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
686     if (Status != Legalized)
687       return Status;
688     break;
689   }
690   case TargetOpcode::G_FADD:
691   case TargetOpcode::G_FSUB:
692   case TargetOpcode::G_FMUL:
693   case TargetOpcode::G_FDIV:
694   case TargetOpcode::G_FMA:
695   case TargetOpcode::G_FPOW:
696   case TargetOpcode::G_FREM:
697   case TargetOpcode::G_FCOS:
698   case TargetOpcode::G_FSIN:
699   case TargetOpcode::G_FLOG10:
700   case TargetOpcode::G_FLOG:
701   case TargetOpcode::G_FLOG2:
702   case TargetOpcode::G_FEXP:
703   case TargetOpcode::G_FEXP2:
704   case TargetOpcode::G_FCEIL:
705   case TargetOpcode::G_FFLOOR:
706   case TargetOpcode::G_FMINNUM:
707   case TargetOpcode::G_FMAXNUM:
708   case TargetOpcode::G_FSQRT:
709   case TargetOpcode::G_FRINT:
710   case TargetOpcode::G_FNEARBYINT:
711   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
712     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
713     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
714       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
715       return UnableToLegalize;
716     }
717     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
718     if (Status != Legalized)
719       return Status;
720     break;
721   }
722   case TargetOpcode::G_FPEXT:
723   case TargetOpcode::G_FPTRUNC: {
724     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
725     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
726     if (!FromTy || !ToTy)
727       return UnableToLegalize;
728     LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
729     if (Status != Legalized)
730       return Status;
731     break;
732   }
733   case TargetOpcode::G_FPTOSI:
734   case TargetOpcode::G_FPTOUI: {
735     // FIXME: Support other types
736     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
737     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
738     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
739       return UnableToLegalize;
740     LegalizeResult Status = conversionLibcall(
741         MI, MIRBuilder,
742         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
743         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
744     if (Status != Legalized)
745       return Status;
746     break;
747   }
748   case TargetOpcode::G_SITOFP:
749   case TargetOpcode::G_UITOFP: {
750     // FIXME: Support other types
751     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
752     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
753     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
754       return UnableToLegalize;
755     LegalizeResult Status = conversionLibcall(
756         MI, MIRBuilder,
757         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
758         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
759     if (Status != Legalized)
760       return Status;
761     break;
762   }
763   case TargetOpcode::G_BZERO:
764   case TargetOpcode::G_MEMCPY:
765   case TargetOpcode::G_MEMMOVE:
766   case TargetOpcode::G_MEMSET: {
767     LegalizeResult Result =
768         createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
769     if (Result != Legalized)
770       return Result;
771     MI.eraseFromParent();
772     return Result;
773   }
774   }
775 
776   MI.eraseFromParent();
777   return Legalized;
778 }
779 
780 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
781                                                               unsigned TypeIdx,
782                                                               LLT NarrowTy) {
783   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
784   uint64_t NarrowSize = NarrowTy.getSizeInBits();
785 
786   switch (MI.getOpcode()) {
787   default:
788     return UnableToLegalize;
789   case TargetOpcode::G_IMPLICIT_DEF: {
790     Register DstReg = MI.getOperand(0).getReg();
791     LLT DstTy = MRI.getType(DstReg);
792 
793     // If SizeOp0 is not an exact multiple of NarrowSize, emit
794     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
795     // FIXME: Although this would also be legal for the general case, it causes
796     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
797     //  combines not being hit). This seems to be a problem related to the
798     //  artifact combiner.
799     if (SizeOp0 % NarrowSize != 0) {
800       LLT ImplicitTy = NarrowTy;
801       if (DstTy.isVector())
802         ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
803 
804       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
805       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
806 
807       MI.eraseFromParent();
808       return Legalized;
809     }
810 
811     int NumParts = SizeOp0 / NarrowSize;
812 
813     SmallVector<Register, 2> DstRegs;
814     for (int i = 0; i < NumParts; ++i)
815       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
816 
817     if (DstTy.isVector())
818       MIRBuilder.buildBuildVector(DstReg, DstRegs);
819     else
820       MIRBuilder.buildMerge(DstReg, DstRegs);
821     MI.eraseFromParent();
822     return Legalized;
823   }
824   case TargetOpcode::G_CONSTANT: {
825     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
826     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
827     unsigned TotalSize = Ty.getSizeInBits();
828     unsigned NarrowSize = NarrowTy.getSizeInBits();
829     int NumParts = TotalSize / NarrowSize;
830 
831     SmallVector<Register, 4> PartRegs;
832     for (int I = 0; I != NumParts; ++I) {
833       unsigned Offset = I * NarrowSize;
834       auto K = MIRBuilder.buildConstant(NarrowTy,
835                                         Val.lshr(Offset).trunc(NarrowSize));
836       PartRegs.push_back(K.getReg(0));
837     }
838 
839     LLT LeftoverTy;
840     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
841     SmallVector<Register, 1> LeftoverRegs;
842     if (LeftoverBits != 0) {
843       LeftoverTy = LLT::scalar(LeftoverBits);
844       auto K = MIRBuilder.buildConstant(
845         LeftoverTy,
846         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
847       LeftoverRegs.push_back(K.getReg(0));
848     }
849 
850     insertParts(MI.getOperand(0).getReg(),
851                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
852 
853     MI.eraseFromParent();
854     return Legalized;
855   }
856   case TargetOpcode::G_SEXT:
857   case TargetOpcode::G_ZEXT:
858   case TargetOpcode::G_ANYEXT:
859     return narrowScalarExt(MI, TypeIdx, NarrowTy);
860   case TargetOpcode::G_TRUNC: {
861     if (TypeIdx != 1)
862       return UnableToLegalize;
863 
864     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
865     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
866       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
867       return UnableToLegalize;
868     }
869 
870     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
871     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
872     MI.eraseFromParent();
873     return Legalized;
874   }
875 
876   case TargetOpcode::G_FREEZE:
877     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
878   case TargetOpcode::G_ADD:
879   case TargetOpcode::G_SUB:
880   case TargetOpcode::G_SADDO:
881   case TargetOpcode::G_SSUBO:
882   case TargetOpcode::G_SADDE:
883   case TargetOpcode::G_SSUBE:
884   case TargetOpcode::G_UADDO:
885   case TargetOpcode::G_USUBO:
886   case TargetOpcode::G_UADDE:
887   case TargetOpcode::G_USUBE:
888     return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
889   case TargetOpcode::G_MUL:
890   case TargetOpcode::G_UMULH:
891     return narrowScalarMul(MI, NarrowTy);
892   case TargetOpcode::G_EXTRACT:
893     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
894   case TargetOpcode::G_INSERT:
895     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
896   case TargetOpcode::G_LOAD: {
897     auto &MMO = **MI.memoperands_begin();
898     Register DstReg = MI.getOperand(0).getReg();
899     LLT DstTy = MRI.getType(DstReg);
900     if (DstTy.isVector())
901       return UnableToLegalize;
902 
903     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
904       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
905       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
906       MIRBuilder.buildAnyExt(DstReg, TmpReg);
907       MI.eraseFromParent();
908       return Legalized;
909     }
910 
911     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
912   }
913   case TargetOpcode::G_ZEXTLOAD:
914   case TargetOpcode::G_SEXTLOAD: {
915     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
916     Register DstReg = MI.getOperand(0).getReg();
917     Register PtrReg = MI.getOperand(1).getReg();
918 
919     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
920     auto &MMO = **MI.memoperands_begin();
921     unsigned MemSize = MMO.getSizeInBits();
922 
923     if (MemSize == NarrowSize) {
924       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
925     } else if (MemSize < NarrowSize) {
926       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
927     } else if (MemSize > NarrowSize) {
928       // FIXME: Need to split the load.
929       return UnableToLegalize;
930     }
931 
932     if (ZExt)
933       MIRBuilder.buildZExt(DstReg, TmpReg);
934     else
935       MIRBuilder.buildSExt(DstReg, TmpReg);
936 
937     MI.eraseFromParent();
938     return Legalized;
939   }
940   case TargetOpcode::G_STORE: {
941     const auto &MMO = **MI.memoperands_begin();
942 
943     Register SrcReg = MI.getOperand(0).getReg();
944     LLT SrcTy = MRI.getType(SrcReg);
945     if (SrcTy.isVector())
946       return UnableToLegalize;
947 
948     int NumParts = SizeOp0 / NarrowSize;
949     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
950     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
951     if (SrcTy.isVector() && LeftoverBits != 0)
952       return UnableToLegalize;
953 
954     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
955       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
956       auto &MMO = **MI.memoperands_begin();
957       MIRBuilder.buildTrunc(TmpReg, SrcReg);
958       MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
959       MI.eraseFromParent();
960       return Legalized;
961     }
962 
963     return reduceLoadStoreWidth(MI, 0, NarrowTy);
964   }
965   case TargetOpcode::G_SELECT:
966     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
967   case TargetOpcode::G_AND:
968   case TargetOpcode::G_OR:
969   case TargetOpcode::G_XOR: {
970     // Legalize bitwise operation:
971     // A = BinOp<Ty> B, C
972     // into:
973     // B1, ..., BN = G_UNMERGE_VALUES B
974     // C1, ..., CN = G_UNMERGE_VALUES C
975     // A1 = BinOp<Ty/N> B1, C2
976     // ...
977     // AN = BinOp<Ty/N> BN, CN
978     // A = G_MERGE_VALUES A1, ..., AN
979     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
980   }
981   case TargetOpcode::G_SHL:
982   case TargetOpcode::G_LSHR:
983   case TargetOpcode::G_ASHR:
984     return narrowScalarShift(MI, TypeIdx, NarrowTy);
985   case TargetOpcode::G_CTLZ:
986   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
987   case TargetOpcode::G_CTTZ:
988   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
989   case TargetOpcode::G_CTPOP:
990     if (TypeIdx == 1)
991       switch (MI.getOpcode()) {
992       case TargetOpcode::G_CTLZ:
993       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
994         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
995       case TargetOpcode::G_CTTZ:
996       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
997         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
998       case TargetOpcode::G_CTPOP:
999         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1000       default:
1001         return UnableToLegalize;
1002       }
1003 
1004     Observer.changingInstr(MI);
1005     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1006     Observer.changedInstr(MI);
1007     return Legalized;
1008   case TargetOpcode::G_INTTOPTR:
1009     if (TypeIdx != 1)
1010       return UnableToLegalize;
1011 
1012     Observer.changingInstr(MI);
1013     narrowScalarSrc(MI, NarrowTy, 1);
1014     Observer.changedInstr(MI);
1015     return Legalized;
1016   case TargetOpcode::G_PTRTOINT:
1017     if (TypeIdx != 0)
1018       return UnableToLegalize;
1019 
1020     Observer.changingInstr(MI);
1021     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1022     Observer.changedInstr(MI);
1023     return Legalized;
1024   case TargetOpcode::G_PHI: {
1025     // FIXME: add support for when SizeOp0 isn't an exact multiple of
1026     // NarrowSize.
1027     if (SizeOp0 % NarrowSize != 0)
1028       return UnableToLegalize;
1029 
1030     unsigned NumParts = SizeOp0 / NarrowSize;
1031     SmallVector<Register, 2> DstRegs(NumParts);
1032     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1033     Observer.changingInstr(MI);
1034     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1035       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1036       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1037       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1038                    SrcRegs[i / 2]);
1039     }
1040     MachineBasicBlock &MBB = *MI.getParent();
1041     MIRBuilder.setInsertPt(MBB, MI);
1042     for (unsigned i = 0; i < NumParts; ++i) {
1043       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1044       MachineInstrBuilder MIB =
1045           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1046       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1047         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1048     }
1049     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1050     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1051     Observer.changedInstr(MI);
1052     MI.eraseFromParent();
1053     return Legalized;
1054   }
1055   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1056   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1057     if (TypeIdx != 2)
1058       return UnableToLegalize;
1059 
1060     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1061     Observer.changingInstr(MI);
1062     narrowScalarSrc(MI, NarrowTy, OpIdx);
1063     Observer.changedInstr(MI);
1064     return Legalized;
1065   }
1066   case TargetOpcode::G_ICMP: {
1067     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1068     if (NarrowSize * 2 != SrcSize)
1069       return UnableToLegalize;
1070 
1071     Observer.changingInstr(MI);
1072     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1073     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1074     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1075 
1076     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1077     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1078     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1079 
1080     CmpInst::Predicate Pred =
1081         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1082     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1083 
1084     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1085       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1086       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1087       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1088       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1089       MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1090     } else {
1091       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1092       MachineInstrBuilder CmpHEQ =
1093           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1094       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1095           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1096       MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1097     }
1098     Observer.changedInstr(MI);
1099     MI.eraseFromParent();
1100     return Legalized;
1101   }
1102   case TargetOpcode::G_SEXT_INREG: {
1103     if (TypeIdx != 0)
1104       return UnableToLegalize;
1105 
1106     int64_t SizeInBits = MI.getOperand(2).getImm();
1107 
1108     // So long as the new type has more bits than the bits we're extending we
1109     // don't need to break it apart.
1110     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1111       Observer.changingInstr(MI);
1112       // We don't lose any non-extension bits by truncating the src and
1113       // sign-extending the dst.
1114       MachineOperand &MO1 = MI.getOperand(1);
1115       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1116       MO1.setReg(TruncMIB.getReg(0));
1117 
1118       MachineOperand &MO2 = MI.getOperand(0);
1119       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1120       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1121       MIRBuilder.buildSExt(MO2, DstExt);
1122       MO2.setReg(DstExt);
1123       Observer.changedInstr(MI);
1124       return Legalized;
1125     }
1126 
1127     // Break it apart. Components below the extension point are unmodified. The
1128     // component containing the extension point becomes a narrower SEXT_INREG.
1129     // Components above it are ashr'd from the component containing the
1130     // extension point.
1131     if (SizeOp0 % NarrowSize != 0)
1132       return UnableToLegalize;
1133     int NumParts = SizeOp0 / NarrowSize;
1134 
1135     // List the registers where the destination will be scattered.
1136     SmallVector<Register, 2> DstRegs;
1137     // List the registers where the source will be split.
1138     SmallVector<Register, 2> SrcRegs;
1139 
1140     // Create all the temporary registers.
1141     for (int i = 0; i < NumParts; ++i) {
1142       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1143 
1144       SrcRegs.push_back(SrcReg);
1145     }
1146 
1147     // Explode the big arguments into smaller chunks.
1148     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1149 
1150     Register AshrCstReg =
1151         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1152             .getReg(0);
1153     Register FullExtensionReg = 0;
1154     Register PartialExtensionReg = 0;
1155 
1156     // Do the operation on each small part.
1157     for (int i = 0; i < NumParts; ++i) {
1158       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1159         DstRegs.push_back(SrcRegs[i]);
1160       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1161         assert(PartialExtensionReg &&
1162                "Expected to visit partial extension before full");
1163         if (FullExtensionReg) {
1164           DstRegs.push_back(FullExtensionReg);
1165           continue;
1166         }
1167         DstRegs.push_back(
1168             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1169                 .getReg(0));
1170         FullExtensionReg = DstRegs.back();
1171       } else {
1172         DstRegs.push_back(
1173             MIRBuilder
1174                 .buildInstr(
1175                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
1176                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1177                 .getReg(0));
1178         PartialExtensionReg = DstRegs.back();
1179       }
1180     }
1181 
1182     // Gather the destination registers into the final destination.
1183     Register DstReg = MI.getOperand(0).getReg();
1184     MIRBuilder.buildMerge(DstReg, DstRegs);
1185     MI.eraseFromParent();
1186     return Legalized;
1187   }
1188   case TargetOpcode::G_BSWAP:
1189   case TargetOpcode::G_BITREVERSE: {
1190     if (SizeOp0 % NarrowSize != 0)
1191       return UnableToLegalize;
1192 
1193     Observer.changingInstr(MI);
1194     SmallVector<Register, 2> SrcRegs, DstRegs;
1195     unsigned NumParts = SizeOp0 / NarrowSize;
1196     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1197 
1198     for (unsigned i = 0; i < NumParts; ++i) {
1199       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1200                                            {SrcRegs[NumParts - 1 - i]});
1201       DstRegs.push_back(DstPart.getReg(0));
1202     }
1203 
1204     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1205 
1206     Observer.changedInstr(MI);
1207     MI.eraseFromParent();
1208     return Legalized;
1209   }
1210   case TargetOpcode::G_PTR_ADD:
1211   case TargetOpcode::G_PTRMASK: {
1212     if (TypeIdx != 1)
1213       return UnableToLegalize;
1214     Observer.changingInstr(MI);
1215     narrowScalarSrc(MI, NarrowTy, 2);
1216     Observer.changedInstr(MI);
1217     return Legalized;
1218   }
1219   case TargetOpcode::G_FPTOUI: {
1220     if (TypeIdx != 0)
1221       return UnableToLegalize;
1222     Observer.changingInstr(MI);
1223     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1224     Observer.changedInstr(MI);
1225     return Legalized;
1226   }
1227   case TargetOpcode::G_FPTOSI: {
1228     if (TypeIdx != 0)
1229       return UnableToLegalize;
1230     Observer.changingInstr(MI);
1231     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
1232     Observer.changedInstr(MI);
1233     return Legalized;
1234   }
1235   case TargetOpcode::G_FPEXT:
1236     if (TypeIdx != 0)
1237       return UnableToLegalize;
1238     Observer.changingInstr(MI);
1239     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1240     Observer.changedInstr(MI);
1241     return Legalized;
1242   }
1243 }
1244 
1245 Register LegalizerHelper::coerceToScalar(Register Val) {
1246   LLT Ty = MRI.getType(Val);
1247   if (Ty.isScalar())
1248     return Val;
1249 
1250   const DataLayout &DL = MIRBuilder.getDataLayout();
1251   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1252   if (Ty.isPointer()) {
1253     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1254       return Register();
1255     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1256   }
1257 
1258   Register NewVal = Val;
1259 
1260   assert(Ty.isVector());
1261   LLT EltTy = Ty.getElementType();
1262   if (EltTy.isPointer())
1263     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1264   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1265 }
1266 
1267 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1268                                      unsigned OpIdx, unsigned ExtOpcode) {
1269   MachineOperand &MO = MI.getOperand(OpIdx);
1270   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1271   MO.setReg(ExtB.getReg(0));
1272 }
1273 
1274 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1275                                       unsigned OpIdx) {
1276   MachineOperand &MO = MI.getOperand(OpIdx);
1277   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1278   MO.setReg(ExtB.getReg(0));
1279 }
1280 
1281 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1282                                      unsigned OpIdx, unsigned TruncOpcode) {
1283   MachineOperand &MO = MI.getOperand(OpIdx);
1284   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1285   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1286   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1287   MO.setReg(DstExt);
1288 }
1289 
1290 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1291                                       unsigned OpIdx, unsigned ExtOpcode) {
1292   MachineOperand &MO = MI.getOperand(OpIdx);
1293   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1294   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1295   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1296   MO.setReg(DstTrunc);
1297 }
1298 
1299 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1300                                             unsigned OpIdx) {
1301   MachineOperand &MO = MI.getOperand(OpIdx);
1302   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1303   MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1304 }
1305 
1306 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1307                                             unsigned OpIdx) {
1308   MachineOperand &MO = MI.getOperand(OpIdx);
1309 
1310   LLT OldTy = MRI.getType(MO.getReg());
1311   unsigned OldElts = OldTy.getNumElements();
1312   unsigned NewElts = MoreTy.getNumElements();
1313 
1314   unsigned NumParts = NewElts / OldElts;
1315 
1316   // Use concat_vectors if the result is a multiple of the number of elements.
1317   if (NumParts * OldElts == NewElts) {
1318     SmallVector<Register, 8> Parts;
1319     Parts.push_back(MO.getReg());
1320 
1321     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1322     for (unsigned I = 1; I != NumParts; ++I)
1323       Parts.push_back(ImpDef);
1324 
1325     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1326     MO.setReg(Concat.getReg(0));
1327     return;
1328   }
1329 
1330   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1331   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1332   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1333   MO.setReg(MoreReg);
1334 }
1335 
1336 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1337   MachineOperand &Op = MI.getOperand(OpIdx);
1338   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1339 }
1340 
1341 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1342   MachineOperand &MO = MI.getOperand(OpIdx);
1343   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1344   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1345   MIRBuilder.buildBitcast(MO, CastDst);
1346   MO.setReg(CastDst);
1347 }
1348 
1349 LegalizerHelper::LegalizeResult
1350 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1351                                         LLT WideTy) {
1352   if (TypeIdx != 1)
1353     return UnableToLegalize;
1354 
1355   Register DstReg = MI.getOperand(0).getReg();
1356   LLT DstTy = MRI.getType(DstReg);
1357   if (DstTy.isVector())
1358     return UnableToLegalize;
1359 
1360   Register Src1 = MI.getOperand(1).getReg();
1361   LLT SrcTy = MRI.getType(Src1);
1362   const int DstSize = DstTy.getSizeInBits();
1363   const int SrcSize = SrcTy.getSizeInBits();
1364   const int WideSize = WideTy.getSizeInBits();
1365   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1366 
1367   unsigned NumOps = MI.getNumOperands();
1368   unsigned NumSrc = MI.getNumOperands() - 1;
1369   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1370 
1371   if (WideSize >= DstSize) {
1372     // Directly pack the bits in the target type.
1373     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1374 
1375     for (unsigned I = 2; I != NumOps; ++I) {
1376       const unsigned Offset = (I - 1) * PartSize;
1377 
1378       Register SrcReg = MI.getOperand(I).getReg();
1379       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1380 
1381       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1382 
1383       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1384         MRI.createGenericVirtualRegister(WideTy);
1385 
1386       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1387       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1388       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1389       ResultReg = NextResult;
1390     }
1391 
1392     if (WideSize > DstSize)
1393       MIRBuilder.buildTrunc(DstReg, ResultReg);
1394     else if (DstTy.isPointer())
1395       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1396 
1397     MI.eraseFromParent();
1398     return Legalized;
1399   }
1400 
1401   // Unmerge the original values to the GCD type, and recombine to the next
1402   // multiple greater than the original type.
1403   //
1404   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1405   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1406   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1407   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1408   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1409   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1410   // %12:_(s12) = G_MERGE_VALUES %10, %11
1411   //
1412   // Padding with undef if necessary:
1413   //
1414   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1415   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1416   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1417   // %7:_(s2) = G_IMPLICIT_DEF
1418   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1419   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1420   // %10:_(s12) = G_MERGE_VALUES %8, %9
1421 
1422   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1423   LLT GCDTy = LLT::scalar(GCD);
1424 
1425   SmallVector<Register, 8> Parts;
1426   SmallVector<Register, 8> NewMergeRegs;
1427   SmallVector<Register, 8> Unmerges;
1428   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1429 
1430   // Decompose the original operands if they don't evenly divide.
1431   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1432     Register SrcReg = MI.getOperand(I).getReg();
1433     if (GCD == SrcSize) {
1434       Unmerges.push_back(SrcReg);
1435     } else {
1436       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1437       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1438         Unmerges.push_back(Unmerge.getReg(J));
1439     }
1440   }
1441 
1442   // Pad with undef to the next size that is a multiple of the requested size.
1443   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1444     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1445     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1446       Unmerges.push_back(UndefReg);
1447   }
1448 
1449   const int PartsPerGCD = WideSize / GCD;
1450 
1451   // Build merges of each piece.
1452   ArrayRef<Register> Slicer(Unmerges);
1453   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1454     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1455     NewMergeRegs.push_back(Merge.getReg(0));
1456   }
1457 
1458   // A truncate may be necessary if the requested type doesn't evenly divide the
1459   // original result type.
1460   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1461     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1462   } else {
1463     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1464     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1465   }
1466 
1467   MI.eraseFromParent();
1468   return Legalized;
1469 }
1470 
1471 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
1472   Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1473   LLT OrigTy = MRI.getType(OrigReg);
1474   LLT LCMTy = getLCMType(WideTy, OrigTy);
1475 
1476   const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1477   const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1478 
1479   Register UnmergeSrc = WideReg;
1480 
1481   // Create a merge to the LCM type, padding with undef
1482   // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1483   // =>
1484   // %1:_(<4 x s32>) = G_FOO
1485   // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1486   // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1487   // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1488   if (NumMergeParts > 1) {
1489     Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1490     SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1491     MergeParts[0] = WideReg;
1492     UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1493   }
1494 
1495   // Unmerge to the original register and pad with dead defs.
1496   SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1497   UnmergeResults[0] = OrigReg;
1498   for (int I = 1; I != NumUnmergeParts; ++I)
1499     UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1500 
1501   MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1502   return WideReg;
1503 }
1504 
1505 LegalizerHelper::LegalizeResult
1506 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1507                                           LLT WideTy) {
1508   if (TypeIdx != 0)
1509     return UnableToLegalize;
1510 
1511   int NumDst = MI.getNumOperands() - 1;
1512   Register SrcReg = MI.getOperand(NumDst).getReg();
1513   LLT SrcTy = MRI.getType(SrcReg);
1514   if (SrcTy.isVector())
1515     return UnableToLegalize;
1516 
1517   Register Dst0Reg = MI.getOperand(0).getReg();
1518   LLT DstTy = MRI.getType(Dst0Reg);
1519   if (!DstTy.isScalar())
1520     return UnableToLegalize;
1521 
1522   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1523     if (SrcTy.isPointer()) {
1524       const DataLayout &DL = MIRBuilder.getDataLayout();
1525       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1526         LLVM_DEBUG(
1527             dbgs() << "Not casting non-integral address space integer\n");
1528         return UnableToLegalize;
1529       }
1530 
1531       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1532       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1533     }
1534 
1535     // Widen SrcTy to WideTy. This does not affect the result, but since the
1536     // user requested this size, it is probably better handled than SrcTy and
1537     // should reduce the total number of legalization artifacts
1538     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1539       SrcTy = WideTy;
1540       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1541     }
1542 
1543     // Theres no unmerge type to target. Directly extract the bits from the
1544     // source type
1545     unsigned DstSize = DstTy.getSizeInBits();
1546 
1547     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1548     for (int I = 1; I != NumDst; ++I) {
1549       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1550       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1551       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1552     }
1553 
1554     MI.eraseFromParent();
1555     return Legalized;
1556   }
1557 
1558   // Extend the source to a wider type.
1559   LLT LCMTy = getLCMType(SrcTy, WideTy);
1560 
1561   Register WideSrc = SrcReg;
1562   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1563     // TODO: If this is an integral address space, cast to integer and anyext.
1564     if (SrcTy.isPointer()) {
1565       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1566       return UnableToLegalize;
1567     }
1568 
1569     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1570   }
1571 
1572   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1573 
1574   // Create a sequence of unmerges and merges to the original results. Since we
1575   // may have widened the source, we will need to pad the results with dead defs
1576   // to cover the source register.
1577   // e.g. widen s48 to s64:
1578   // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1579   //
1580   // =>
1581   //  %4:_(s192) = G_ANYEXT %0:_(s96)
1582   //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1583   //  ; unpack to GCD type, with extra dead defs
1584   //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1585   //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1586   //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1587   //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
1588   //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1589   const LLT GCDTy = getGCDType(WideTy, DstTy);
1590   const int NumUnmerge = Unmerge->getNumOperands() - 1;
1591   const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1592 
1593   // Directly unmerge to the destination without going through a GCD type
1594   // if possible
1595   if (PartsPerRemerge == 1) {
1596     const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1597 
1598     for (int I = 0; I != NumUnmerge; ++I) {
1599       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1600 
1601       for (int J = 0; J != PartsPerUnmerge; ++J) {
1602         int Idx = I * PartsPerUnmerge + J;
1603         if (Idx < NumDst)
1604           MIB.addDef(MI.getOperand(Idx).getReg());
1605         else {
1606           // Create dead def for excess components.
1607           MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1608         }
1609       }
1610 
1611       MIB.addUse(Unmerge.getReg(I));
1612     }
1613   } else {
1614     SmallVector<Register, 16> Parts;
1615     for (int J = 0; J != NumUnmerge; ++J)
1616       extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1617 
1618     SmallVector<Register, 8> RemergeParts;
1619     for (int I = 0; I != NumDst; ++I) {
1620       for (int J = 0; J < PartsPerRemerge; ++J) {
1621         const int Idx = I * PartsPerRemerge + J;
1622         RemergeParts.emplace_back(Parts[Idx]);
1623       }
1624 
1625       MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1626       RemergeParts.clear();
1627     }
1628   }
1629 
1630   MI.eraseFromParent();
1631   return Legalized;
1632 }
1633 
1634 LegalizerHelper::LegalizeResult
1635 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1636                                     LLT WideTy) {
1637   Register DstReg = MI.getOperand(0).getReg();
1638   Register SrcReg = MI.getOperand(1).getReg();
1639   LLT SrcTy = MRI.getType(SrcReg);
1640 
1641   LLT DstTy = MRI.getType(DstReg);
1642   unsigned Offset = MI.getOperand(2).getImm();
1643 
1644   if (TypeIdx == 0) {
1645     if (SrcTy.isVector() || DstTy.isVector())
1646       return UnableToLegalize;
1647 
1648     SrcOp Src(SrcReg);
1649     if (SrcTy.isPointer()) {
1650       // Extracts from pointers can be handled only if they are really just
1651       // simple integers.
1652       const DataLayout &DL = MIRBuilder.getDataLayout();
1653       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1654         return UnableToLegalize;
1655 
1656       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1657       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1658       SrcTy = SrcAsIntTy;
1659     }
1660 
1661     if (DstTy.isPointer())
1662       return UnableToLegalize;
1663 
1664     if (Offset == 0) {
1665       // Avoid a shift in the degenerate case.
1666       MIRBuilder.buildTrunc(DstReg,
1667                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1668       MI.eraseFromParent();
1669       return Legalized;
1670     }
1671 
1672     // Do a shift in the source type.
1673     LLT ShiftTy = SrcTy;
1674     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1675       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1676       ShiftTy = WideTy;
1677     }
1678 
1679     auto LShr = MIRBuilder.buildLShr(
1680       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1681     MIRBuilder.buildTrunc(DstReg, LShr);
1682     MI.eraseFromParent();
1683     return Legalized;
1684   }
1685 
1686   if (SrcTy.isScalar()) {
1687     Observer.changingInstr(MI);
1688     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1689     Observer.changedInstr(MI);
1690     return Legalized;
1691   }
1692 
1693   if (!SrcTy.isVector())
1694     return UnableToLegalize;
1695 
1696   if (DstTy != SrcTy.getElementType())
1697     return UnableToLegalize;
1698 
1699   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1700     return UnableToLegalize;
1701 
1702   Observer.changingInstr(MI);
1703   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1704 
1705   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1706                           Offset);
1707   widenScalarDst(MI, WideTy.getScalarType(), 0);
1708   Observer.changedInstr(MI);
1709   return Legalized;
1710 }
1711 
1712 LegalizerHelper::LegalizeResult
1713 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1714                                    LLT WideTy) {
1715   if (TypeIdx != 0 || WideTy.isVector())
1716     return UnableToLegalize;
1717   Observer.changingInstr(MI);
1718   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1719   widenScalarDst(MI, WideTy);
1720   Observer.changedInstr(MI);
1721   return Legalized;
1722 }
1723 
1724 LegalizerHelper::LegalizeResult
1725 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1726                                            LLT WideTy) {
1727   if (TypeIdx == 1)
1728     return UnableToLegalize; // TODO
1729 
1730   unsigned Opcode;
1731   unsigned ExtOpcode;
1732   Optional<Register> CarryIn = None;
1733   switch (MI.getOpcode()) {
1734   default:
1735     llvm_unreachable("Unexpected opcode!");
1736   case TargetOpcode::G_SADDO:
1737     Opcode = TargetOpcode::G_ADD;
1738     ExtOpcode = TargetOpcode::G_SEXT;
1739     break;
1740   case TargetOpcode::G_SSUBO:
1741     Opcode = TargetOpcode::G_SUB;
1742     ExtOpcode = TargetOpcode::G_SEXT;
1743     break;
1744   case TargetOpcode::G_UADDO:
1745     Opcode = TargetOpcode::G_ADD;
1746     ExtOpcode = TargetOpcode::G_ZEXT;
1747     break;
1748   case TargetOpcode::G_USUBO:
1749     Opcode = TargetOpcode::G_SUB;
1750     ExtOpcode = TargetOpcode::G_ZEXT;
1751     break;
1752   case TargetOpcode::G_SADDE:
1753     Opcode = TargetOpcode::G_UADDE;
1754     ExtOpcode = TargetOpcode::G_SEXT;
1755     CarryIn = MI.getOperand(4).getReg();
1756     break;
1757   case TargetOpcode::G_SSUBE:
1758     Opcode = TargetOpcode::G_USUBE;
1759     ExtOpcode = TargetOpcode::G_SEXT;
1760     CarryIn = MI.getOperand(4).getReg();
1761     break;
1762   case TargetOpcode::G_UADDE:
1763     Opcode = TargetOpcode::G_UADDE;
1764     ExtOpcode = TargetOpcode::G_ZEXT;
1765     CarryIn = MI.getOperand(4).getReg();
1766     break;
1767   case TargetOpcode::G_USUBE:
1768     Opcode = TargetOpcode::G_USUBE;
1769     ExtOpcode = TargetOpcode::G_ZEXT;
1770     CarryIn = MI.getOperand(4).getReg();
1771     break;
1772   }
1773 
1774   auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1775   auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1776   // Do the arithmetic in the larger type.
1777   Register NewOp;
1778   if (CarryIn) {
1779     LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1780     NewOp = MIRBuilder
1781                 .buildInstr(Opcode, {WideTy, CarryOutTy},
1782                             {LHSExt, RHSExt, *CarryIn})
1783                 .getReg(0);
1784   } else {
1785     NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1786   }
1787   LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1788   auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1789   auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1790   // There is no overflow if the ExtOp is the same as NewOp.
1791   MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1792   // Now trunc the NewOp to the original result.
1793   MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1794   MI.eraseFromParent();
1795   return Legalized;
1796 }
1797 
1798 LegalizerHelper::LegalizeResult
1799 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1800                                          LLT WideTy) {
1801   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1802                   MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1803                   MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1804   bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1805                  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1806   // We can convert this to:
1807   //   1. Any extend iN to iM
1808   //   2. SHL by M-N
1809   //   3. [US][ADD|SUB|SHL]SAT
1810   //   4. L/ASHR by M-N
1811   //
1812   // It may be more efficient to lower this to a min and a max operation in
1813   // the higher precision arithmetic if the promoted operation isn't legal,
1814   // but this decision is up to the target's lowering request.
1815   Register DstReg = MI.getOperand(0).getReg();
1816 
1817   unsigned NewBits = WideTy.getScalarSizeInBits();
1818   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1819 
1820   // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1821   // must not left shift the RHS to preserve the shift amount.
1822   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1823   auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1824                      : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1825   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1826   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1827   auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1828 
1829   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1830                                         {ShiftL, ShiftR}, MI.getFlags());
1831 
1832   // Use a shift that will preserve the number of sign bits when the trunc is
1833   // folded away.
1834   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1835                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1836 
1837   MIRBuilder.buildTrunc(DstReg, Result);
1838   MI.eraseFromParent();
1839   return Legalized;
1840 }
1841 
1842 LegalizerHelper::LegalizeResult
1843 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1844                                  LLT WideTy) {
1845   if (TypeIdx == 1)
1846     return UnableToLegalize;
1847 
1848   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1849   Register Result = MI.getOperand(0).getReg();
1850   Register OriginalOverflow = MI.getOperand(1).getReg();
1851   Register LHS = MI.getOperand(2).getReg();
1852   Register RHS = MI.getOperand(3).getReg();
1853   LLT SrcTy = MRI.getType(LHS);
1854   LLT OverflowTy = MRI.getType(OriginalOverflow);
1855   unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1856 
1857   // To determine if the result overflowed in the larger type, we extend the
1858   // input to the larger type, do the multiply (checking if it overflows),
1859   // then also check the high bits of the result to see if overflow happened
1860   // there.
1861   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1862   auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1863   auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1864 
1865   auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1866                                     {LeftOperand, RightOperand});
1867   auto Mul = Mulo->getOperand(0);
1868   MIRBuilder.buildTrunc(Result, Mul);
1869 
1870   MachineInstrBuilder ExtResult;
1871   // Overflow occurred if it occurred in the larger type, or if the high part
1872   // of the result does not zero/sign-extend the low part.  Check this second
1873   // possibility first.
1874   if (IsSigned) {
1875     // For signed, overflow occurred when the high part does not sign-extend
1876     // the low part.
1877     ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
1878   } else {
1879     // Unsigned overflow occurred when the high part does not zero-extend the
1880     // low part.
1881     ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
1882   }
1883 
1884   // Multiplication cannot overflow if the WideTy is >= 2 * original width,
1885   // so we don't need to check the overflow result of larger type Mulo.
1886   if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
1887     auto Overflow =
1888         MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
1889     // Finally check if the multiplication in the larger type itself overflowed.
1890     MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
1891   } else {
1892     MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
1893   }
1894   MI.eraseFromParent();
1895   return Legalized;
1896 }
1897 
1898 LegalizerHelper::LegalizeResult
1899 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1900   switch (MI.getOpcode()) {
1901   default:
1902     return UnableToLegalize;
1903   case TargetOpcode::G_EXTRACT:
1904     return widenScalarExtract(MI, TypeIdx, WideTy);
1905   case TargetOpcode::G_INSERT:
1906     return widenScalarInsert(MI, TypeIdx, WideTy);
1907   case TargetOpcode::G_MERGE_VALUES:
1908     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1909   case TargetOpcode::G_UNMERGE_VALUES:
1910     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1911   case TargetOpcode::G_SADDO:
1912   case TargetOpcode::G_SSUBO:
1913   case TargetOpcode::G_UADDO:
1914   case TargetOpcode::G_USUBO:
1915   case TargetOpcode::G_SADDE:
1916   case TargetOpcode::G_SSUBE:
1917   case TargetOpcode::G_UADDE:
1918   case TargetOpcode::G_USUBE:
1919     return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
1920   case TargetOpcode::G_UMULO:
1921   case TargetOpcode::G_SMULO:
1922     return widenScalarMulo(MI, TypeIdx, WideTy);
1923   case TargetOpcode::G_SADDSAT:
1924   case TargetOpcode::G_SSUBSAT:
1925   case TargetOpcode::G_SSHLSAT:
1926   case TargetOpcode::G_UADDSAT:
1927   case TargetOpcode::G_USUBSAT:
1928   case TargetOpcode::G_USHLSAT:
1929     return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1930   case TargetOpcode::G_CTTZ:
1931   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1932   case TargetOpcode::G_CTLZ:
1933   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1934   case TargetOpcode::G_CTPOP: {
1935     if (TypeIdx == 0) {
1936       Observer.changingInstr(MI);
1937       widenScalarDst(MI, WideTy, 0);
1938       Observer.changedInstr(MI);
1939       return Legalized;
1940     }
1941 
1942     Register SrcReg = MI.getOperand(1).getReg();
1943 
1944     // First ZEXT the input.
1945     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1946     LLT CurTy = MRI.getType(SrcReg);
1947     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1948       // The count is the same in the larger type except if the original
1949       // value was zero.  This can be handled by setting the bit just off
1950       // the top of the original type.
1951       auto TopBit =
1952           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1953       MIBSrc = MIRBuilder.buildOr(
1954         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1955     }
1956 
1957     // Perform the operation at the larger size.
1958     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1959     // This is already the correct result for CTPOP and CTTZs
1960     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1961         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1962       // The correct result is NewOp - (Difference in widety and current ty).
1963       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1964       MIBNewOp = MIRBuilder.buildSub(
1965           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1966     }
1967 
1968     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1969     MI.eraseFromParent();
1970     return Legalized;
1971   }
1972   case TargetOpcode::G_BSWAP: {
1973     Observer.changingInstr(MI);
1974     Register DstReg = MI.getOperand(0).getReg();
1975 
1976     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1977     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1978     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1979     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1980 
1981     MI.getOperand(0).setReg(DstExt);
1982 
1983     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1984 
1985     LLT Ty = MRI.getType(DstReg);
1986     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1987     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1988     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1989 
1990     MIRBuilder.buildTrunc(DstReg, ShrReg);
1991     Observer.changedInstr(MI);
1992     return Legalized;
1993   }
1994   case TargetOpcode::G_BITREVERSE: {
1995     Observer.changingInstr(MI);
1996 
1997     Register DstReg = MI.getOperand(0).getReg();
1998     LLT Ty = MRI.getType(DstReg);
1999     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2000 
2001     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2002     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2003     MI.getOperand(0).setReg(DstExt);
2004     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2005 
2006     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2007     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2008     MIRBuilder.buildTrunc(DstReg, Shift);
2009     Observer.changedInstr(MI);
2010     return Legalized;
2011   }
2012   case TargetOpcode::G_FREEZE:
2013     Observer.changingInstr(MI);
2014     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2015     widenScalarDst(MI, WideTy);
2016     Observer.changedInstr(MI);
2017     return Legalized;
2018 
2019   case TargetOpcode::G_ADD:
2020   case TargetOpcode::G_AND:
2021   case TargetOpcode::G_MUL:
2022   case TargetOpcode::G_OR:
2023   case TargetOpcode::G_XOR:
2024   case TargetOpcode::G_SUB:
2025     // Perform operation at larger width (any extension is fines here, high bits
2026     // don't affect the result) and then truncate the result back to the
2027     // original type.
2028     Observer.changingInstr(MI);
2029     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2030     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2031     widenScalarDst(MI, WideTy);
2032     Observer.changedInstr(MI);
2033     return Legalized;
2034 
2035   case TargetOpcode::G_SHL:
2036     Observer.changingInstr(MI);
2037 
2038     if (TypeIdx == 0) {
2039       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2040       widenScalarDst(MI, WideTy);
2041     } else {
2042       assert(TypeIdx == 1);
2043       // The "number of bits to shift" operand must preserve its value as an
2044       // unsigned integer:
2045       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2046     }
2047 
2048     Observer.changedInstr(MI);
2049     return Legalized;
2050 
2051   case TargetOpcode::G_SDIV:
2052   case TargetOpcode::G_SREM:
2053   case TargetOpcode::G_SMIN:
2054   case TargetOpcode::G_SMAX:
2055     Observer.changingInstr(MI);
2056     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2057     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2058     widenScalarDst(MI, WideTy);
2059     Observer.changedInstr(MI);
2060     return Legalized;
2061 
2062   case TargetOpcode::G_ASHR:
2063   case TargetOpcode::G_LSHR:
2064     Observer.changingInstr(MI);
2065 
2066     if (TypeIdx == 0) {
2067       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2068         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2069 
2070       widenScalarSrc(MI, WideTy, 1, CvtOp);
2071       widenScalarDst(MI, WideTy);
2072     } else {
2073       assert(TypeIdx == 1);
2074       // The "number of bits to shift" operand must preserve its value as an
2075       // unsigned integer:
2076       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2077     }
2078 
2079     Observer.changedInstr(MI);
2080     return Legalized;
2081   case TargetOpcode::G_UDIV:
2082   case TargetOpcode::G_UREM:
2083   case TargetOpcode::G_UMIN:
2084   case TargetOpcode::G_UMAX:
2085     Observer.changingInstr(MI);
2086     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2087     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2088     widenScalarDst(MI, WideTy);
2089     Observer.changedInstr(MI);
2090     return Legalized;
2091 
2092   case TargetOpcode::G_SELECT:
2093     Observer.changingInstr(MI);
2094     if (TypeIdx == 0) {
2095       // Perform operation at larger width (any extension is fine here, high
2096       // bits don't affect the result) and then truncate the result back to the
2097       // original type.
2098       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2099       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2100       widenScalarDst(MI, WideTy);
2101     } else {
2102       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2103       // Explicit extension is required here since high bits affect the result.
2104       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2105     }
2106     Observer.changedInstr(MI);
2107     return Legalized;
2108 
2109   case TargetOpcode::G_FPTOSI:
2110   case TargetOpcode::G_FPTOUI:
2111     Observer.changingInstr(MI);
2112 
2113     if (TypeIdx == 0)
2114       widenScalarDst(MI, WideTy);
2115     else
2116       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2117 
2118     Observer.changedInstr(MI);
2119     return Legalized;
2120   case TargetOpcode::G_SITOFP:
2121     Observer.changingInstr(MI);
2122 
2123     if (TypeIdx == 0)
2124       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2125     else
2126       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2127 
2128     Observer.changedInstr(MI);
2129     return Legalized;
2130   case TargetOpcode::G_UITOFP:
2131     Observer.changingInstr(MI);
2132 
2133     if (TypeIdx == 0)
2134       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2135     else
2136       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2137 
2138     Observer.changedInstr(MI);
2139     return Legalized;
2140   case TargetOpcode::G_LOAD:
2141   case TargetOpcode::G_SEXTLOAD:
2142   case TargetOpcode::G_ZEXTLOAD:
2143     Observer.changingInstr(MI);
2144     widenScalarDst(MI, WideTy);
2145     Observer.changedInstr(MI);
2146     return Legalized;
2147 
2148   case TargetOpcode::G_STORE: {
2149     if (TypeIdx != 0)
2150       return UnableToLegalize;
2151 
2152     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2153     if (!Ty.isScalar())
2154       return UnableToLegalize;
2155 
2156     Observer.changingInstr(MI);
2157 
2158     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2159       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2160     widenScalarSrc(MI, WideTy, 0, ExtType);
2161 
2162     Observer.changedInstr(MI);
2163     return Legalized;
2164   }
2165   case TargetOpcode::G_CONSTANT: {
2166     MachineOperand &SrcMO = MI.getOperand(1);
2167     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2168     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2169         MRI.getType(MI.getOperand(0).getReg()));
2170     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2171             ExtOpc == TargetOpcode::G_ANYEXT) &&
2172            "Illegal Extend");
2173     const APInt &SrcVal = SrcMO.getCImm()->getValue();
2174     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2175                            ? SrcVal.sext(WideTy.getSizeInBits())
2176                            : SrcVal.zext(WideTy.getSizeInBits());
2177     Observer.changingInstr(MI);
2178     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2179 
2180     widenScalarDst(MI, WideTy);
2181     Observer.changedInstr(MI);
2182     return Legalized;
2183   }
2184   case TargetOpcode::G_FCONSTANT: {
2185     MachineOperand &SrcMO = MI.getOperand(1);
2186     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2187     APFloat Val = SrcMO.getFPImm()->getValueAPF();
2188     bool LosesInfo;
2189     switch (WideTy.getSizeInBits()) {
2190     case 32:
2191       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
2192                   &LosesInfo);
2193       break;
2194     case 64:
2195       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
2196                   &LosesInfo);
2197       break;
2198     default:
2199       return UnableToLegalize;
2200     }
2201 
2202     assert(!LosesInfo && "extend should always be lossless");
2203 
2204     Observer.changingInstr(MI);
2205     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2206 
2207     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2208     Observer.changedInstr(MI);
2209     return Legalized;
2210   }
2211   case TargetOpcode::G_IMPLICIT_DEF: {
2212     Observer.changingInstr(MI);
2213     widenScalarDst(MI, WideTy);
2214     Observer.changedInstr(MI);
2215     return Legalized;
2216   }
2217   case TargetOpcode::G_BRCOND:
2218     Observer.changingInstr(MI);
2219     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2220     Observer.changedInstr(MI);
2221     return Legalized;
2222 
2223   case TargetOpcode::G_FCMP:
2224     Observer.changingInstr(MI);
2225     if (TypeIdx == 0)
2226       widenScalarDst(MI, WideTy);
2227     else {
2228       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2229       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2230     }
2231     Observer.changedInstr(MI);
2232     return Legalized;
2233 
2234   case TargetOpcode::G_ICMP:
2235     Observer.changingInstr(MI);
2236     if (TypeIdx == 0)
2237       widenScalarDst(MI, WideTy);
2238     else {
2239       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2240                                MI.getOperand(1).getPredicate()))
2241                                ? TargetOpcode::G_SEXT
2242                                : TargetOpcode::G_ZEXT;
2243       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2244       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2245     }
2246     Observer.changedInstr(MI);
2247     return Legalized;
2248 
2249   case TargetOpcode::G_PTR_ADD:
2250     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2251     Observer.changingInstr(MI);
2252     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2253     Observer.changedInstr(MI);
2254     return Legalized;
2255 
2256   case TargetOpcode::G_PHI: {
2257     assert(TypeIdx == 0 && "Expecting only Idx 0");
2258 
2259     Observer.changingInstr(MI);
2260     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2261       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2262       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2263       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2264     }
2265 
2266     MachineBasicBlock &MBB = *MI.getParent();
2267     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2268     widenScalarDst(MI, WideTy);
2269     Observer.changedInstr(MI);
2270     return Legalized;
2271   }
2272   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2273     if (TypeIdx == 0) {
2274       Register VecReg = MI.getOperand(1).getReg();
2275       LLT VecTy = MRI.getType(VecReg);
2276       Observer.changingInstr(MI);
2277 
2278       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
2279                                      WideTy.getSizeInBits()),
2280                      1, TargetOpcode::G_SEXT);
2281 
2282       widenScalarDst(MI, WideTy, 0);
2283       Observer.changedInstr(MI);
2284       return Legalized;
2285     }
2286 
2287     if (TypeIdx != 2)
2288       return UnableToLegalize;
2289     Observer.changingInstr(MI);
2290     // TODO: Probably should be zext
2291     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2292     Observer.changedInstr(MI);
2293     return Legalized;
2294   }
2295   case TargetOpcode::G_INSERT_VECTOR_ELT: {
2296     if (TypeIdx == 1) {
2297       Observer.changingInstr(MI);
2298 
2299       Register VecReg = MI.getOperand(1).getReg();
2300       LLT VecTy = MRI.getType(VecReg);
2301       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2302 
2303       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2304       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2305       widenScalarDst(MI, WideVecTy, 0);
2306       Observer.changedInstr(MI);
2307       return Legalized;
2308     }
2309 
2310     if (TypeIdx == 2) {
2311       Observer.changingInstr(MI);
2312       // TODO: Probably should be zext
2313       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2314       Observer.changedInstr(MI);
2315       return Legalized;
2316     }
2317 
2318     return UnableToLegalize;
2319   }
2320   case TargetOpcode::G_FADD:
2321   case TargetOpcode::G_FMUL:
2322   case TargetOpcode::G_FSUB:
2323   case TargetOpcode::G_FMA:
2324   case TargetOpcode::G_FMAD:
2325   case TargetOpcode::G_FNEG:
2326   case TargetOpcode::G_FABS:
2327   case TargetOpcode::G_FCANONICALIZE:
2328   case TargetOpcode::G_FMINNUM:
2329   case TargetOpcode::G_FMAXNUM:
2330   case TargetOpcode::G_FMINNUM_IEEE:
2331   case TargetOpcode::G_FMAXNUM_IEEE:
2332   case TargetOpcode::G_FMINIMUM:
2333   case TargetOpcode::G_FMAXIMUM:
2334   case TargetOpcode::G_FDIV:
2335   case TargetOpcode::G_FREM:
2336   case TargetOpcode::G_FCEIL:
2337   case TargetOpcode::G_FFLOOR:
2338   case TargetOpcode::G_FCOS:
2339   case TargetOpcode::G_FSIN:
2340   case TargetOpcode::G_FLOG10:
2341   case TargetOpcode::G_FLOG:
2342   case TargetOpcode::G_FLOG2:
2343   case TargetOpcode::G_FRINT:
2344   case TargetOpcode::G_FNEARBYINT:
2345   case TargetOpcode::G_FSQRT:
2346   case TargetOpcode::G_FEXP:
2347   case TargetOpcode::G_FEXP2:
2348   case TargetOpcode::G_FPOW:
2349   case TargetOpcode::G_INTRINSIC_TRUNC:
2350   case TargetOpcode::G_INTRINSIC_ROUND:
2351   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2352     assert(TypeIdx == 0);
2353     Observer.changingInstr(MI);
2354 
2355     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2356       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2357 
2358     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2359     Observer.changedInstr(MI);
2360     return Legalized;
2361   case TargetOpcode::G_FPOWI: {
2362     if (TypeIdx != 0)
2363       return UnableToLegalize;
2364     Observer.changingInstr(MI);
2365     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2366     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2367     Observer.changedInstr(MI);
2368     return Legalized;
2369   }
2370   case TargetOpcode::G_INTTOPTR:
2371     if (TypeIdx != 1)
2372       return UnableToLegalize;
2373 
2374     Observer.changingInstr(MI);
2375     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2376     Observer.changedInstr(MI);
2377     return Legalized;
2378   case TargetOpcode::G_PTRTOINT:
2379     if (TypeIdx != 0)
2380       return UnableToLegalize;
2381 
2382     Observer.changingInstr(MI);
2383     widenScalarDst(MI, WideTy, 0);
2384     Observer.changedInstr(MI);
2385     return Legalized;
2386   case TargetOpcode::G_BUILD_VECTOR: {
2387     Observer.changingInstr(MI);
2388 
2389     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2390     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2391       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2392 
2393     // Avoid changing the result vector type if the source element type was
2394     // requested.
2395     if (TypeIdx == 1) {
2396       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2397     } else {
2398       widenScalarDst(MI, WideTy, 0);
2399     }
2400 
2401     Observer.changedInstr(MI);
2402     return Legalized;
2403   }
2404   case TargetOpcode::G_SEXT_INREG:
2405     if (TypeIdx != 0)
2406       return UnableToLegalize;
2407 
2408     Observer.changingInstr(MI);
2409     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2410     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2411     Observer.changedInstr(MI);
2412     return Legalized;
2413   case TargetOpcode::G_PTRMASK: {
2414     if (TypeIdx != 1)
2415       return UnableToLegalize;
2416     Observer.changingInstr(MI);
2417     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2418     Observer.changedInstr(MI);
2419     return Legalized;
2420   }
2421   }
2422 }
2423 
2424 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2425                              MachineIRBuilder &B, Register Src, LLT Ty) {
2426   auto Unmerge = B.buildUnmerge(Ty, Src);
2427   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2428     Pieces.push_back(Unmerge.getReg(I));
2429 }
2430 
2431 LegalizerHelper::LegalizeResult
2432 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2433   Register Dst = MI.getOperand(0).getReg();
2434   Register Src = MI.getOperand(1).getReg();
2435   LLT DstTy = MRI.getType(Dst);
2436   LLT SrcTy = MRI.getType(Src);
2437 
2438   if (SrcTy.isVector()) {
2439     LLT SrcEltTy = SrcTy.getElementType();
2440     SmallVector<Register, 8> SrcRegs;
2441 
2442     if (DstTy.isVector()) {
2443       int NumDstElt = DstTy.getNumElements();
2444       int NumSrcElt = SrcTy.getNumElements();
2445 
2446       LLT DstEltTy = DstTy.getElementType();
2447       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2448       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2449 
2450       // If there's an element size mismatch, insert intermediate casts to match
2451       // the result element type.
2452       if (NumSrcElt < NumDstElt) { // Source element type is larger.
2453         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2454         //
2455         // =>
2456         //
2457         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2458         // %3:_(<2 x s8>) = G_BITCAST %2
2459         // %4:_(<2 x s8>) = G_BITCAST %3
2460         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2461         DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2462         SrcPartTy = SrcEltTy;
2463       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2464         //
2465         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2466         //
2467         // =>
2468         //
2469         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2470         // %3:_(s16) = G_BITCAST %2
2471         // %4:_(s16) = G_BITCAST %3
2472         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2473         SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2474         DstCastTy = DstEltTy;
2475       }
2476 
2477       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2478       for (Register &SrcReg : SrcRegs)
2479         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2480     } else
2481       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2482 
2483     MIRBuilder.buildMerge(Dst, SrcRegs);
2484     MI.eraseFromParent();
2485     return Legalized;
2486   }
2487 
2488   if (DstTy.isVector()) {
2489     SmallVector<Register, 8> SrcRegs;
2490     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2491     MIRBuilder.buildMerge(Dst, SrcRegs);
2492     MI.eraseFromParent();
2493     return Legalized;
2494   }
2495 
2496   return UnableToLegalize;
2497 }
2498 
2499 /// Figure out the bit offset into a register when coercing a vector index for
2500 /// the wide element type. This is only for the case when promoting vector to
2501 /// one with larger elements.
2502 //
2503 ///
2504 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2505 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2506 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
2507                                                    Register Idx,
2508                                                    unsigned NewEltSize,
2509                                                    unsigned OldEltSize) {
2510   const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2511   LLT IdxTy = B.getMRI()->getType(Idx);
2512 
2513   // Now figure out the amount we need to shift to get the target bits.
2514   auto OffsetMask = B.buildConstant(
2515     IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2516   auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2517   return B.buildShl(IdxTy, OffsetIdx,
2518                     B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2519 }
2520 
2521 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2522 /// is casting to a vector with a smaller element size, perform multiple element
2523 /// extracts and merge the results. If this is coercing to a vector with larger
2524 /// elements, index the bitcasted vector and extract the target element with bit
2525 /// operations. This is intended to force the indexing in the native register
2526 /// size for architectures that can dynamically index the register file.
2527 LegalizerHelper::LegalizeResult
2528 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
2529                                          LLT CastTy) {
2530   if (TypeIdx != 1)
2531     return UnableToLegalize;
2532 
2533   Register Dst = MI.getOperand(0).getReg();
2534   Register SrcVec = MI.getOperand(1).getReg();
2535   Register Idx = MI.getOperand(2).getReg();
2536   LLT SrcVecTy = MRI.getType(SrcVec);
2537   LLT IdxTy = MRI.getType(Idx);
2538 
2539   LLT SrcEltTy = SrcVecTy.getElementType();
2540   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2541   unsigned OldNumElts = SrcVecTy.getNumElements();
2542 
2543   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2544   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2545 
2546   const unsigned NewEltSize = NewEltTy.getSizeInBits();
2547   const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2548   if (NewNumElts > OldNumElts) {
2549     // Decreasing the vector element size
2550     //
2551     // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2552     //  =>
2553     //  v4i32:castx = bitcast x:v2i64
2554     //
2555     // i64 = bitcast
2556     //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2557     //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
2558     //
2559     if (NewNumElts % OldNumElts != 0)
2560       return UnableToLegalize;
2561 
2562     // Type of the intermediate result vector.
2563     const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2564     LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2565 
2566     auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2567 
2568     SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2569     auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2570 
2571     for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2572       auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2573       auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2574       auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2575       NewOps[I] = Elt.getReg(0);
2576     }
2577 
2578     auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2579     MIRBuilder.buildBitcast(Dst, NewVec);
2580     MI.eraseFromParent();
2581     return Legalized;
2582   }
2583 
2584   if (NewNumElts < OldNumElts) {
2585     if (NewEltSize % OldEltSize != 0)
2586       return UnableToLegalize;
2587 
2588     // This only depends on powers of 2 because we use bit tricks to figure out
2589     // the bit offset we need to shift to get the target element. A general
2590     // expansion could emit division/multiply.
2591     if (!isPowerOf2_32(NewEltSize / OldEltSize))
2592       return UnableToLegalize;
2593 
2594     // Increasing the vector element size.
2595     // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2596     //
2597     //   =>
2598     //
2599     // %cast = G_BITCAST %vec
2600     // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2601     // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2602     // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2603     // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2604     // %elt_bits = G_LSHR %wide_elt, %offset_bits
2605     // %elt = G_TRUNC %elt_bits
2606 
2607     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2608     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2609 
2610     // Divide to get the index in the wider element type.
2611     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2612 
2613     Register WideElt = CastVec;
2614     if (CastTy.isVector()) {
2615       WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2616                                                      ScaledIdx).getReg(0);
2617     }
2618 
2619     // Compute the bit offset into the register of the target element.
2620     Register OffsetBits = getBitcastWiderVectorElementOffset(
2621       MIRBuilder, Idx, NewEltSize, OldEltSize);
2622 
2623     // Shift the wide element to get the target element.
2624     auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2625     MIRBuilder.buildTrunc(Dst, ExtractedBits);
2626     MI.eraseFromParent();
2627     return Legalized;
2628   }
2629 
2630   return UnableToLegalize;
2631 }
2632 
2633 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2634 /// TargetReg, while preserving other bits in \p TargetReg.
2635 ///
2636 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2637 static Register buildBitFieldInsert(MachineIRBuilder &B,
2638                                     Register TargetReg, Register InsertReg,
2639                                     Register OffsetBits) {
2640   LLT TargetTy = B.getMRI()->getType(TargetReg);
2641   LLT InsertTy = B.getMRI()->getType(InsertReg);
2642   auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2643   auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2644 
2645   // Produce a bitmask of the value to insert
2646   auto EltMask = B.buildConstant(
2647     TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2648                                    InsertTy.getSizeInBits()));
2649   // Shift it into position
2650   auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2651   auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2652 
2653   // Clear out the bits in the wide element
2654   auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2655 
2656   // The value to insert has all zeros already, so stick it into the masked
2657   // wide element.
2658   return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2659 }
2660 
2661 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2662 /// is increasing the element size, perform the indexing in the target element
2663 /// type, and use bit operations to insert at the element position. This is
2664 /// intended for architectures that can dynamically index the register file and
2665 /// want to force indexing in the native register size.
2666 LegalizerHelper::LegalizeResult
2667 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
2668                                         LLT CastTy) {
2669   if (TypeIdx != 0)
2670     return UnableToLegalize;
2671 
2672   Register Dst = MI.getOperand(0).getReg();
2673   Register SrcVec = MI.getOperand(1).getReg();
2674   Register Val = MI.getOperand(2).getReg();
2675   Register Idx = MI.getOperand(3).getReg();
2676 
2677   LLT VecTy = MRI.getType(Dst);
2678   LLT IdxTy = MRI.getType(Idx);
2679 
2680   LLT VecEltTy = VecTy.getElementType();
2681   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2682   const unsigned NewEltSize = NewEltTy.getSizeInBits();
2683   const unsigned OldEltSize = VecEltTy.getSizeInBits();
2684 
2685   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2686   unsigned OldNumElts = VecTy.getNumElements();
2687 
2688   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2689   if (NewNumElts < OldNumElts) {
2690     if (NewEltSize % OldEltSize != 0)
2691       return UnableToLegalize;
2692 
2693     // This only depends on powers of 2 because we use bit tricks to figure out
2694     // the bit offset we need to shift to get the target element. A general
2695     // expansion could emit division/multiply.
2696     if (!isPowerOf2_32(NewEltSize / OldEltSize))
2697       return UnableToLegalize;
2698 
2699     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2700     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2701 
2702     // Divide to get the index in the wider element type.
2703     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2704 
2705     Register ExtractedElt = CastVec;
2706     if (CastTy.isVector()) {
2707       ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2708                                                           ScaledIdx).getReg(0);
2709     }
2710 
2711     // Compute the bit offset into the register of the target element.
2712     Register OffsetBits = getBitcastWiderVectorElementOffset(
2713       MIRBuilder, Idx, NewEltSize, OldEltSize);
2714 
2715     Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2716                                                Val, OffsetBits);
2717     if (CastTy.isVector()) {
2718       InsertedElt = MIRBuilder.buildInsertVectorElement(
2719         CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2720     }
2721 
2722     MIRBuilder.buildBitcast(Dst, InsertedElt);
2723     MI.eraseFromParent();
2724     return Legalized;
2725   }
2726 
2727   return UnableToLegalize;
2728 }
2729 
2730 LegalizerHelper::LegalizeResult
2731 LegalizerHelper::lowerLoad(MachineInstr &MI) {
2732   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2733   Register DstReg = MI.getOperand(0).getReg();
2734   Register PtrReg = MI.getOperand(1).getReg();
2735   LLT DstTy = MRI.getType(DstReg);
2736   auto &MMO = **MI.memoperands_begin();
2737 
2738   if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2739     if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2740       // This load needs splitting into power of 2 sized loads.
2741       if (DstTy.isVector())
2742         return UnableToLegalize;
2743       if (isPowerOf2_32(DstTy.getSizeInBits()))
2744         return UnableToLegalize; // Don't know what we're being asked to do.
2745 
2746       // Our strategy here is to generate anyextending loads for the smaller
2747       // types up to next power-2 result type, and then combine the two larger
2748       // result values together, before truncating back down to the non-pow-2
2749       // type.
2750       // E.g. v1 = i24 load =>
2751       // v2 = i32 zextload (2 byte)
2752       // v3 = i32 load (1 byte)
2753       // v4 = i32 shl v3, 16
2754       // v5 = i32 or v4, v2
2755       // v1 = i24 trunc v5
2756       // By doing this we generate the correct truncate which should get
2757       // combined away as an artifact with a matching extend.
2758       uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2759       uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2760 
2761       MachineFunction &MF = MIRBuilder.getMF();
2762       MachineMemOperand *LargeMMO =
2763         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2764       MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2765         &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2766 
2767       LLT PtrTy = MRI.getType(PtrReg);
2768       unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2769       LLT AnyExtTy = LLT::scalar(AnyExtSize);
2770       Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2771       Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2772       auto LargeLoad = MIRBuilder.buildLoadInstr(
2773         TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2774 
2775       auto OffsetCst = MIRBuilder.buildConstant(
2776         LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2777       Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2778       auto SmallPtr =
2779         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2780       auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2781                                             *SmallMMO);
2782 
2783       auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2784       auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2785       auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2786       MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2787       MI.eraseFromParent();
2788       return Legalized;
2789     }
2790 
2791     MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2792     MI.eraseFromParent();
2793     return Legalized;
2794   }
2795 
2796   if (DstTy.isScalar()) {
2797     Register TmpReg =
2798       MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2799     MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2800     switch (MI.getOpcode()) {
2801     default:
2802       llvm_unreachable("Unexpected opcode");
2803     case TargetOpcode::G_LOAD:
2804       MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
2805       break;
2806     case TargetOpcode::G_SEXTLOAD:
2807       MIRBuilder.buildSExt(DstReg, TmpReg);
2808       break;
2809     case TargetOpcode::G_ZEXTLOAD:
2810       MIRBuilder.buildZExt(DstReg, TmpReg);
2811       break;
2812     }
2813 
2814     MI.eraseFromParent();
2815     return Legalized;
2816   }
2817 
2818   return UnableToLegalize;
2819 }
2820 
2821 LegalizerHelper::LegalizeResult
2822 LegalizerHelper::lowerStore(MachineInstr &MI) {
2823   // Lower a non-power of 2 store into multiple pow-2 stores.
2824   // E.g. split an i24 store into an i16 store + i8 store.
2825   // We do this by first extending the stored value to the next largest power
2826   // of 2 type, and then using truncating stores to store the components.
2827   // By doing this, likewise with G_LOAD, generate an extend that can be
2828   // artifact-combined away instead of leaving behind extracts.
2829   Register SrcReg = MI.getOperand(0).getReg();
2830   Register PtrReg = MI.getOperand(1).getReg();
2831   LLT SrcTy = MRI.getType(SrcReg);
2832   MachineMemOperand &MMO = **MI.memoperands_begin();
2833   if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2834     return UnableToLegalize;
2835   if (SrcTy.isVector())
2836     return UnableToLegalize;
2837   if (isPowerOf2_32(SrcTy.getSizeInBits()))
2838     return UnableToLegalize; // Don't know what we're being asked to do.
2839 
2840   // Extend to the next pow-2.
2841   const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2842   auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2843 
2844   // Obtain the smaller value by shifting away the larger value.
2845   uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2846   uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2847   auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2848   auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2849 
2850   // Generate the PtrAdd and truncating stores.
2851   LLT PtrTy = MRI.getType(PtrReg);
2852   auto OffsetCst = MIRBuilder.buildConstant(
2853     LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2854   Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2855   auto SmallPtr =
2856     MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2857 
2858   MachineFunction &MF = MIRBuilder.getMF();
2859   MachineMemOperand *LargeMMO =
2860     MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2861   MachineMemOperand *SmallMMO =
2862     MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2863   MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2864   MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2865   MI.eraseFromParent();
2866   return Legalized;
2867 }
2868 
2869 LegalizerHelper::LegalizeResult
2870 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2871   switch (MI.getOpcode()) {
2872   case TargetOpcode::G_LOAD: {
2873     if (TypeIdx != 0)
2874       return UnableToLegalize;
2875 
2876     Observer.changingInstr(MI);
2877     bitcastDst(MI, CastTy, 0);
2878     Observer.changedInstr(MI);
2879     return Legalized;
2880   }
2881   case TargetOpcode::G_STORE: {
2882     if (TypeIdx != 0)
2883       return UnableToLegalize;
2884 
2885     Observer.changingInstr(MI);
2886     bitcastSrc(MI, CastTy, 0);
2887     Observer.changedInstr(MI);
2888     return Legalized;
2889   }
2890   case TargetOpcode::G_SELECT: {
2891     if (TypeIdx != 0)
2892       return UnableToLegalize;
2893 
2894     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2895       LLVM_DEBUG(
2896           dbgs() << "bitcast action not implemented for vector select\n");
2897       return UnableToLegalize;
2898     }
2899 
2900     Observer.changingInstr(MI);
2901     bitcastSrc(MI, CastTy, 2);
2902     bitcastSrc(MI, CastTy, 3);
2903     bitcastDst(MI, CastTy, 0);
2904     Observer.changedInstr(MI);
2905     return Legalized;
2906   }
2907   case TargetOpcode::G_AND:
2908   case TargetOpcode::G_OR:
2909   case TargetOpcode::G_XOR: {
2910     Observer.changingInstr(MI);
2911     bitcastSrc(MI, CastTy, 1);
2912     bitcastSrc(MI, CastTy, 2);
2913     bitcastDst(MI, CastTy, 0);
2914     Observer.changedInstr(MI);
2915     return Legalized;
2916   }
2917   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2918     return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2919   case TargetOpcode::G_INSERT_VECTOR_ELT:
2920     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2921   default:
2922     return UnableToLegalize;
2923   }
2924 }
2925 
2926 // Legalize an instruction by changing the opcode in place.
2927 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2928     Observer.changingInstr(MI);
2929     MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2930     Observer.changedInstr(MI);
2931 }
2932 
2933 LegalizerHelper::LegalizeResult
2934 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2935   using namespace TargetOpcode;
2936 
2937   switch(MI.getOpcode()) {
2938   default:
2939     return UnableToLegalize;
2940   case TargetOpcode::G_BITCAST:
2941     return lowerBitcast(MI);
2942   case TargetOpcode::G_SREM:
2943   case TargetOpcode::G_UREM: {
2944     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2945     auto Quot =
2946         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2947                               {MI.getOperand(1), MI.getOperand(2)});
2948 
2949     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2950     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2951     MI.eraseFromParent();
2952     return Legalized;
2953   }
2954   case TargetOpcode::G_SADDO:
2955   case TargetOpcode::G_SSUBO:
2956     return lowerSADDO_SSUBO(MI);
2957   case TargetOpcode::G_UMULH:
2958   case TargetOpcode::G_SMULH:
2959     return lowerSMULH_UMULH(MI);
2960   case TargetOpcode::G_SMULO:
2961   case TargetOpcode::G_UMULO: {
2962     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2963     // result.
2964     Register Res = MI.getOperand(0).getReg();
2965     Register Overflow = MI.getOperand(1).getReg();
2966     Register LHS = MI.getOperand(2).getReg();
2967     Register RHS = MI.getOperand(3).getReg();
2968     LLT Ty = MRI.getType(Res);
2969 
2970     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2971                           ? TargetOpcode::G_SMULH
2972                           : TargetOpcode::G_UMULH;
2973 
2974     Observer.changingInstr(MI);
2975     const auto &TII = MIRBuilder.getTII();
2976     MI.setDesc(TII.get(TargetOpcode::G_MUL));
2977     MI.RemoveOperand(1);
2978     Observer.changedInstr(MI);
2979 
2980     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2981     auto Zero = MIRBuilder.buildConstant(Ty, 0);
2982 
2983     // Move insert point forward so we can use the Res register if needed.
2984     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2985 
2986     // For *signed* multiply, overflow is detected by checking:
2987     // (hi != (lo >> bitwidth-1))
2988     if (Opcode == TargetOpcode::G_SMULH) {
2989       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2990       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2991       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2992     } else {
2993       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2994     }
2995     return Legalized;
2996   }
2997   case TargetOpcode::G_FNEG: {
2998     Register Res = MI.getOperand(0).getReg();
2999     LLT Ty = MRI.getType(Res);
3000 
3001     // TODO: Handle vector types once we are able to
3002     // represent them.
3003     if (Ty.isVector())
3004       return UnableToLegalize;
3005     auto SignMask =
3006         MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
3007     Register SubByReg = MI.getOperand(1).getReg();
3008     MIRBuilder.buildXor(Res, SubByReg, SignMask);
3009     MI.eraseFromParent();
3010     return Legalized;
3011   }
3012   case TargetOpcode::G_FSUB: {
3013     Register Res = MI.getOperand(0).getReg();
3014     LLT Ty = MRI.getType(Res);
3015 
3016     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3017     // First, check if G_FNEG is marked as Lower. If so, we may
3018     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3019     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3020       return UnableToLegalize;
3021     Register LHS = MI.getOperand(1).getReg();
3022     Register RHS = MI.getOperand(2).getReg();
3023     Register Neg = MRI.createGenericVirtualRegister(Ty);
3024     MIRBuilder.buildFNeg(Neg, RHS);
3025     MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3026     MI.eraseFromParent();
3027     return Legalized;
3028   }
3029   case TargetOpcode::G_FMAD:
3030     return lowerFMad(MI);
3031   case TargetOpcode::G_FFLOOR:
3032     return lowerFFloor(MI);
3033   case TargetOpcode::G_INTRINSIC_ROUND:
3034     return lowerIntrinsicRound(MI);
3035   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3036     // Since round even is the assumed rounding mode for unconstrained FP
3037     // operations, rint and roundeven are the same operation.
3038     changeOpcode(MI, TargetOpcode::G_FRINT);
3039     return Legalized;
3040   }
3041   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3042     Register OldValRes = MI.getOperand(0).getReg();
3043     Register SuccessRes = MI.getOperand(1).getReg();
3044     Register Addr = MI.getOperand(2).getReg();
3045     Register CmpVal = MI.getOperand(3).getReg();
3046     Register NewVal = MI.getOperand(4).getReg();
3047     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3048                                   **MI.memoperands_begin());
3049     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3050     MI.eraseFromParent();
3051     return Legalized;
3052   }
3053   case TargetOpcode::G_LOAD:
3054   case TargetOpcode::G_SEXTLOAD:
3055   case TargetOpcode::G_ZEXTLOAD:
3056     return lowerLoad(MI);
3057   case TargetOpcode::G_STORE:
3058     return lowerStore(MI);
3059   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3060   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3061   case TargetOpcode::G_CTLZ:
3062   case TargetOpcode::G_CTTZ:
3063   case TargetOpcode::G_CTPOP:
3064     return lowerBitCount(MI);
3065   case G_UADDO: {
3066     Register Res = MI.getOperand(0).getReg();
3067     Register CarryOut = MI.getOperand(1).getReg();
3068     Register LHS = MI.getOperand(2).getReg();
3069     Register RHS = MI.getOperand(3).getReg();
3070 
3071     MIRBuilder.buildAdd(Res, LHS, RHS);
3072     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3073 
3074     MI.eraseFromParent();
3075     return Legalized;
3076   }
3077   case G_UADDE: {
3078     Register Res = MI.getOperand(0).getReg();
3079     Register CarryOut = MI.getOperand(1).getReg();
3080     Register LHS = MI.getOperand(2).getReg();
3081     Register RHS = MI.getOperand(3).getReg();
3082     Register CarryIn = MI.getOperand(4).getReg();
3083     LLT Ty = MRI.getType(Res);
3084 
3085     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3086     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3087     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3088     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3089 
3090     MI.eraseFromParent();
3091     return Legalized;
3092   }
3093   case G_USUBO: {
3094     Register Res = MI.getOperand(0).getReg();
3095     Register BorrowOut = MI.getOperand(1).getReg();
3096     Register LHS = MI.getOperand(2).getReg();
3097     Register RHS = MI.getOperand(3).getReg();
3098 
3099     MIRBuilder.buildSub(Res, LHS, RHS);
3100     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3101 
3102     MI.eraseFromParent();
3103     return Legalized;
3104   }
3105   case G_USUBE: {
3106     Register Res = MI.getOperand(0).getReg();
3107     Register BorrowOut = MI.getOperand(1).getReg();
3108     Register LHS = MI.getOperand(2).getReg();
3109     Register RHS = MI.getOperand(3).getReg();
3110     Register BorrowIn = MI.getOperand(4).getReg();
3111     const LLT CondTy = MRI.getType(BorrowOut);
3112     const LLT Ty = MRI.getType(Res);
3113 
3114     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3115     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3116     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3117 
3118     auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3119     auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3120     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3121 
3122     MI.eraseFromParent();
3123     return Legalized;
3124   }
3125   case G_UITOFP:
3126     return lowerUITOFP(MI);
3127   case G_SITOFP:
3128     return lowerSITOFP(MI);
3129   case G_FPTOUI:
3130     return lowerFPTOUI(MI);
3131   case G_FPTOSI:
3132     return lowerFPTOSI(MI);
3133   case G_FPTRUNC:
3134     return lowerFPTRUNC(MI);
3135   case G_FPOWI:
3136     return lowerFPOWI(MI);
3137   case G_SMIN:
3138   case G_SMAX:
3139   case G_UMIN:
3140   case G_UMAX:
3141     return lowerMinMax(MI);
3142   case G_FCOPYSIGN:
3143     return lowerFCopySign(MI);
3144   case G_FMINNUM:
3145   case G_FMAXNUM:
3146     return lowerFMinNumMaxNum(MI);
3147   case G_MERGE_VALUES:
3148     return lowerMergeValues(MI);
3149   case G_UNMERGE_VALUES:
3150     return lowerUnmergeValues(MI);
3151   case TargetOpcode::G_SEXT_INREG: {
3152     assert(MI.getOperand(2).isImm() && "Expected immediate");
3153     int64_t SizeInBits = MI.getOperand(2).getImm();
3154 
3155     Register DstReg = MI.getOperand(0).getReg();
3156     Register SrcReg = MI.getOperand(1).getReg();
3157     LLT DstTy = MRI.getType(DstReg);
3158     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3159 
3160     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3161     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3162     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3163     MI.eraseFromParent();
3164     return Legalized;
3165   }
3166   case G_EXTRACT_VECTOR_ELT:
3167   case G_INSERT_VECTOR_ELT:
3168     return lowerExtractInsertVectorElt(MI);
3169   case G_SHUFFLE_VECTOR:
3170     return lowerShuffleVector(MI);
3171   case G_DYN_STACKALLOC:
3172     return lowerDynStackAlloc(MI);
3173   case G_EXTRACT:
3174     return lowerExtract(MI);
3175   case G_INSERT:
3176     return lowerInsert(MI);
3177   case G_BSWAP:
3178     return lowerBswap(MI);
3179   case G_BITREVERSE:
3180     return lowerBitreverse(MI);
3181   case G_READ_REGISTER:
3182   case G_WRITE_REGISTER:
3183     return lowerReadWriteRegister(MI);
3184   case G_UADDSAT:
3185   case G_USUBSAT: {
3186     // Try to make a reasonable guess about which lowering strategy to use. The
3187     // target can override this with custom lowering and calling the
3188     // implementation functions.
3189     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3190     if (LI.isLegalOrCustom({G_UMIN, Ty}))
3191       return lowerAddSubSatToMinMax(MI);
3192     return lowerAddSubSatToAddoSubo(MI);
3193   }
3194   case G_SADDSAT:
3195   case G_SSUBSAT: {
3196     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3197 
3198     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3199     // since it's a shorter expansion. However, we would need to figure out the
3200     // preferred boolean type for the carry out for the query.
3201     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3202       return lowerAddSubSatToMinMax(MI);
3203     return lowerAddSubSatToAddoSubo(MI);
3204   }
3205   case G_SSHLSAT:
3206   case G_USHLSAT:
3207     return lowerShlSat(MI);
3208   case G_ABS: {
3209     // Expand %res = G_ABS %a into:
3210     // %v1 = G_ASHR %a, scalar_size-1
3211     // %v2 = G_ADD %a, %v1
3212     // %res = G_XOR %v2, %v1
3213     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3214     Register OpReg = MI.getOperand(1).getReg();
3215     auto ShiftAmt =
3216         MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
3217     auto Shift =
3218         MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
3219     auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
3220     MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
3221     MI.eraseFromParent();
3222     return Legalized;
3223   }
3224   case G_SELECT:
3225     return lowerSelect(MI);
3226   case G_SDIVREM:
3227   case G_UDIVREM:
3228     return lowerDIVREM(MI);
3229   case G_FSHL:
3230   case G_FSHR:
3231     return lowerFunnelShift(MI);
3232   case G_ROTL:
3233   case G_ROTR:
3234     return lowerRotate(MI);
3235   }
3236 }
3237 
3238 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3239                                                   Align MinAlign) const {
3240   // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3241   // datalayout for the preferred alignment. Also there should be a target hook
3242   // for this to allow targets to reduce the alignment and ignore the
3243   // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3244   // the type.
3245   return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3246 }
3247 
3248 MachineInstrBuilder
3249 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3250                                       MachinePointerInfo &PtrInfo) {
3251   MachineFunction &MF = MIRBuilder.getMF();
3252   const DataLayout &DL = MIRBuilder.getDataLayout();
3253   int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3254 
3255   unsigned AddrSpace = DL.getAllocaAddrSpace();
3256   LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3257 
3258   PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3259   return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3260 }
3261 
3262 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3263                                         LLT VecTy) {
3264   int64_t IdxVal;
3265   if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3266     return IdxReg;
3267 
3268   LLT IdxTy = B.getMRI()->getType(IdxReg);
3269   unsigned NElts = VecTy.getNumElements();
3270   if (isPowerOf2_32(NElts)) {
3271     APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3272     return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3273   }
3274 
3275   return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3276       .getReg(0);
3277 }
3278 
3279 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3280                                                   Register Index) {
3281   LLT EltTy = VecTy.getElementType();
3282 
3283   // Calculate the element offset and add it to the pointer.
3284   unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3285   assert(EltSize * 8 == EltTy.getSizeInBits() &&
3286          "Converting bits to bytes lost precision");
3287 
3288   Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3289 
3290   LLT IdxTy = MRI.getType(Index);
3291   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3292                                  MIRBuilder.buildConstant(IdxTy, EltSize));
3293 
3294   LLT PtrTy = MRI.getType(VecPtr);
3295   return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3296 }
3297 
3298 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
3299     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3300   Register DstReg = MI.getOperand(0).getReg();
3301   LLT DstTy = MRI.getType(DstReg);
3302   LLT LCMTy = getLCMType(DstTy, NarrowTy);
3303 
3304   unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3305 
3306   auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3307   SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3308 
3309   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3310   MI.eraseFromParent();
3311   return Legalized;
3312 }
3313 
3314 // Handle splitting vector operations which need to have the same number of
3315 // elements in each type index, but each type index may have a different element
3316 // type.
3317 //
3318 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3319 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3320 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3321 //
3322 // Also handles some irregular breakdown cases, e.g.
3323 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3324 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3325 //             s64 = G_SHL s64, s32
3326 LegalizerHelper::LegalizeResult
3327 LegalizerHelper::fewerElementsVectorMultiEltType(
3328   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3329   if (TypeIdx != 0)
3330     return UnableToLegalize;
3331 
3332   const LLT NarrowTy0 = NarrowTyArg;
3333   const unsigned NewNumElts =
3334       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3335 
3336   const Register DstReg = MI.getOperand(0).getReg();
3337   LLT DstTy = MRI.getType(DstReg);
3338   LLT LeftoverTy0;
3339 
3340   // All of the operands need to have the same number of elements, so if we can
3341   // determine a type breakdown for the result type, we can for all of the
3342   // source types.
3343   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3344   if (NumParts < 0)
3345     return UnableToLegalize;
3346 
3347   SmallVector<MachineInstrBuilder, 4> NewInsts;
3348 
3349   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3350   SmallVector<Register, 4> PartRegs, LeftoverRegs;
3351 
3352   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3353     Register SrcReg = MI.getOperand(I).getReg();
3354     LLT SrcTyI = MRI.getType(SrcReg);
3355     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3356     LLT LeftoverTyI;
3357 
3358     // Split this operand into the requested typed registers, and any leftover
3359     // required to reproduce the original type.
3360     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3361                       LeftoverRegs))
3362       return UnableToLegalize;
3363 
3364     if (I == 1) {
3365       // For the first operand, create an instruction for each part and setup
3366       // the result.
3367       for (Register PartReg : PartRegs) {
3368         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3369         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3370                                .addDef(PartDstReg)
3371                                .addUse(PartReg));
3372         DstRegs.push_back(PartDstReg);
3373       }
3374 
3375       for (Register LeftoverReg : LeftoverRegs) {
3376         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3377         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3378                                .addDef(PartDstReg)
3379                                .addUse(LeftoverReg));
3380         LeftoverDstRegs.push_back(PartDstReg);
3381       }
3382     } else {
3383       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3384 
3385       // Add the newly created operand splits to the existing instructions. The
3386       // odd-sized pieces are ordered after the requested NarrowTyArg sized
3387       // pieces.
3388       unsigned InstCount = 0;
3389       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3390         NewInsts[InstCount++].addUse(PartRegs[J]);
3391       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3392         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3393     }
3394 
3395     PartRegs.clear();
3396     LeftoverRegs.clear();
3397   }
3398 
3399   // Insert the newly built operations and rebuild the result register.
3400   for (auto &MIB : NewInsts)
3401     MIRBuilder.insertInstr(MIB);
3402 
3403   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3404 
3405   MI.eraseFromParent();
3406   return Legalized;
3407 }
3408 
3409 LegalizerHelper::LegalizeResult
3410 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
3411                                           LLT NarrowTy) {
3412   if (TypeIdx != 0)
3413     return UnableToLegalize;
3414 
3415   Register DstReg = MI.getOperand(0).getReg();
3416   Register SrcReg = MI.getOperand(1).getReg();
3417   LLT DstTy = MRI.getType(DstReg);
3418   LLT SrcTy = MRI.getType(SrcReg);
3419 
3420   LLT NarrowTy0 = NarrowTy;
3421   LLT NarrowTy1;
3422   unsigned NumParts;
3423 
3424   if (NarrowTy.isVector()) {
3425     // Uneven breakdown not handled.
3426     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3427     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3428       return UnableToLegalize;
3429 
3430     NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3431   } else {
3432     NumParts = DstTy.getNumElements();
3433     NarrowTy1 = SrcTy.getElementType();
3434   }
3435 
3436   SmallVector<Register, 4> SrcRegs, DstRegs;
3437   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3438 
3439   for (unsigned I = 0; I < NumParts; ++I) {
3440     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3441     MachineInstr *NewInst =
3442         MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3443 
3444     NewInst->setFlags(MI.getFlags());
3445     DstRegs.push_back(DstReg);
3446   }
3447 
3448   if (NarrowTy.isVector())
3449     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3450   else
3451     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3452 
3453   MI.eraseFromParent();
3454   return Legalized;
3455 }
3456 
3457 LegalizerHelper::LegalizeResult
3458 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
3459                                         LLT NarrowTy) {
3460   Register DstReg = MI.getOperand(0).getReg();
3461   Register Src0Reg = MI.getOperand(2).getReg();
3462   LLT DstTy = MRI.getType(DstReg);
3463   LLT SrcTy = MRI.getType(Src0Reg);
3464 
3465   unsigned NumParts;
3466   LLT NarrowTy0, NarrowTy1;
3467 
3468   if (TypeIdx == 0) {
3469     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3470     unsigned OldElts = DstTy.getNumElements();
3471 
3472     NarrowTy0 = NarrowTy;
3473     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3474     NarrowTy1 = NarrowTy.isVector() ?
3475       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3476       SrcTy.getElementType();
3477 
3478   } else {
3479     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3480     unsigned OldElts = SrcTy.getNumElements();
3481 
3482     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3483       NarrowTy.getNumElements();
3484     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3485                             DstTy.getScalarSizeInBits());
3486     NarrowTy1 = NarrowTy;
3487   }
3488 
3489   // FIXME: Don't know how to handle the situation where the small vectors
3490   // aren't all the same size yet.
3491   if (NarrowTy1.isVector() &&
3492       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3493     return UnableToLegalize;
3494 
3495   CmpInst::Predicate Pred
3496     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3497 
3498   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3499   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3500   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3501 
3502   for (unsigned I = 0; I < NumParts; ++I) {
3503     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3504     DstRegs.push_back(DstReg);
3505 
3506     if (MI.getOpcode() == TargetOpcode::G_ICMP)
3507       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3508     else {
3509       MachineInstr *NewCmp
3510         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3511       NewCmp->setFlags(MI.getFlags());
3512     }
3513   }
3514 
3515   if (NarrowTy1.isVector())
3516     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3517   else
3518     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3519 
3520   MI.eraseFromParent();
3521   return Legalized;
3522 }
3523 
3524 LegalizerHelper::LegalizeResult
3525 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
3526                                            LLT NarrowTy) {
3527   Register DstReg = MI.getOperand(0).getReg();
3528   Register CondReg = MI.getOperand(1).getReg();
3529 
3530   unsigned NumParts = 0;
3531   LLT NarrowTy0, NarrowTy1;
3532 
3533   LLT DstTy = MRI.getType(DstReg);
3534   LLT CondTy = MRI.getType(CondReg);
3535   unsigned Size = DstTy.getSizeInBits();
3536 
3537   assert(TypeIdx == 0 || CondTy.isVector());
3538 
3539   if (TypeIdx == 0) {
3540     NarrowTy0 = NarrowTy;
3541     NarrowTy1 = CondTy;
3542 
3543     unsigned NarrowSize = NarrowTy0.getSizeInBits();
3544     // FIXME: Don't know how to handle the situation where the small vectors
3545     // aren't all the same size yet.
3546     if (Size % NarrowSize != 0)
3547       return UnableToLegalize;
3548 
3549     NumParts = Size / NarrowSize;
3550 
3551     // Need to break down the condition type
3552     if (CondTy.isVector()) {
3553       if (CondTy.getNumElements() == NumParts)
3554         NarrowTy1 = CondTy.getElementType();
3555       else
3556         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3557                                 CondTy.getScalarSizeInBits());
3558     }
3559   } else {
3560     NumParts = CondTy.getNumElements();
3561     if (NarrowTy.isVector()) {
3562       // TODO: Handle uneven breakdown.
3563       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3564         return UnableToLegalize;
3565 
3566       return UnableToLegalize;
3567     } else {
3568       NarrowTy0 = DstTy.getElementType();
3569       NarrowTy1 = NarrowTy;
3570     }
3571   }
3572 
3573   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3574   if (CondTy.isVector())
3575     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3576 
3577   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3578   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3579 
3580   for (unsigned i = 0; i < NumParts; ++i) {
3581     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3582     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3583                            Src1Regs[i], Src2Regs[i]);
3584     DstRegs.push_back(DstReg);
3585   }
3586 
3587   if (NarrowTy0.isVector())
3588     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3589   else
3590     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3591 
3592   MI.eraseFromParent();
3593   return Legalized;
3594 }
3595 
3596 LegalizerHelper::LegalizeResult
3597 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3598                                         LLT NarrowTy) {
3599   const Register DstReg = MI.getOperand(0).getReg();
3600   LLT PhiTy = MRI.getType(DstReg);
3601   LLT LeftoverTy;
3602 
3603   // All of the operands need to have the same number of elements, so if we can
3604   // determine a type breakdown for the result type, we can for all of the
3605   // source types.
3606   int NumParts, NumLeftover;
3607   std::tie(NumParts, NumLeftover)
3608     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3609   if (NumParts < 0)
3610     return UnableToLegalize;
3611 
3612   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3613   SmallVector<MachineInstrBuilder, 4> NewInsts;
3614 
3615   const int TotalNumParts = NumParts + NumLeftover;
3616 
3617   // Insert the new phis in the result block first.
3618   for (int I = 0; I != TotalNumParts; ++I) {
3619     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3620     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3621     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3622                        .addDef(PartDstReg));
3623     if (I < NumParts)
3624       DstRegs.push_back(PartDstReg);
3625     else
3626       LeftoverDstRegs.push_back(PartDstReg);
3627   }
3628 
3629   MachineBasicBlock *MBB = MI.getParent();
3630   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
3631   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3632 
3633   SmallVector<Register, 4> PartRegs, LeftoverRegs;
3634 
3635   // Insert code to extract the incoming values in each predecessor block.
3636   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3637     PartRegs.clear();
3638     LeftoverRegs.clear();
3639 
3640     Register SrcReg = MI.getOperand(I).getReg();
3641     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3642     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3643 
3644     LLT Unused;
3645     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3646                       LeftoverRegs))
3647       return UnableToLegalize;
3648 
3649     // Add the newly created operand splits to the existing instructions. The
3650     // odd-sized pieces are ordered after the requested NarrowTyArg sized
3651     // pieces.
3652     for (int J = 0; J != TotalNumParts; ++J) {
3653       MachineInstrBuilder MIB = NewInsts[J];
3654       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3655       MIB.addMBB(&OpMBB);
3656     }
3657   }
3658 
3659   MI.eraseFromParent();
3660   return Legalized;
3661 }
3662 
3663 LegalizerHelper::LegalizeResult
3664 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
3665                                                   unsigned TypeIdx,
3666                                                   LLT NarrowTy) {
3667   if (TypeIdx != 1)
3668     return UnableToLegalize;
3669 
3670   const int NumDst = MI.getNumOperands() - 1;
3671   const Register SrcReg = MI.getOperand(NumDst).getReg();
3672   LLT SrcTy = MRI.getType(SrcReg);
3673 
3674   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3675 
3676   // TODO: Create sequence of extracts.
3677   if (DstTy == NarrowTy)
3678     return UnableToLegalize;
3679 
3680   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3681   if (DstTy == GCDTy) {
3682     // This would just be a copy of the same unmerge.
3683     // TODO: Create extracts, pad with undef and create intermediate merges.
3684     return UnableToLegalize;
3685   }
3686 
3687   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3688   const int NumUnmerge = Unmerge->getNumOperands() - 1;
3689   const int PartsPerUnmerge = NumDst / NumUnmerge;
3690 
3691   for (int I = 0; I != NumUnmerge; ++I) {
3692     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3693 
3694     for (int J = 0; J != PartsPerUnmerge; ++J)
3695       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3696     MIB.addUse(Unmerge.getReg(I));
3697   }
3698 
3699   MI.eraseFromParent();
3700   return Legalized;
3701 }
3702 
3703 LegalizerHelper::LegalizeResult
3704 LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
3705                                          LLT NarrowTy) {
3706   Register Result = MI.getOperand(0).getReg();
3707   Register Overflow = MI.getOperand(1).getReg();
3708   Register LHS = MI.getOperand(2).getReg();
3709   Register RHS = MI.getOperand(3).getReg();
3710 
3711   LLT SrcTy = MRI.getType(LHS);
3712   if (!SrcTy.isVector())
3713     return UnableToLegalize;
3714 
3715   LLT ElementType = SrcTy.getElementType();
3716   LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
3717   const int NumResult = SrcTy.getNumElements();
3718   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3719 
3720   // Unmerge the operands to smaller parts of GCD type.
3721   auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
3722   auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
3723 
3724   const int NumOps = UnmergeLHS->getNumOperands() - 1;
3725   const int PartsPerUnmerge = NumResult / NumOps;
3726   LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
3727   LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
3728 
3729   // Perform the operation over unmerged parts.
3730   SmallVector<Register, 8> ResultParts;
3731   SmallVector<Register, 8> OverflowParts;
3732   for (int I = 0; I != NumOps; ++I) {
3733     Register Operand1 = UnmergeLHS->getOperand(I).getReg();
3734     Register Operand2 = UnmergeRHS->getOperand(I).getReg();
3735     auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
3736                                          {Operand1, Operand2});
3737     ResultParts.push_back(PartMul->getOperand(0).getReg());
3738     OverflowParts.push_back(PartMul->getOperand(1).getReg());
3739   }
3740 
3741   LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
3742   LLT OverflowLCMTy =
3743       LLT::scalarOrVector(ResultLCMTy.getNumElements(), OverflowElementTy);
3744 
3745   // Recombine the pieces to the original result and overflow registers.
3746   buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
3747   buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
3748   MI.eraseFromParent();
3749   return Legalized;
3750 }
3751 
3752 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3753 // a vector
3754 //
3755 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3756 // undef as necessary.
3757 //
3758 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3759 //   -> <2 x s16>
3760 //
3761 // %4:_(s16) = G_IMPLICIT_DEF
3762 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3763 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3764 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
3765 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3766 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3767 LegalizerHelper::LegalizeResult
3768 LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
3769                                           LLT NarrowTy) {
3770   Register DstReg = MI.getOperand(0).getReg();
3771   LLT DstTy = MRI.getType(DstReg);
3772   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3773   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3774 
3775   // Break into a common type
3776   SmallVector<Register, 16> Parts;
3777   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3778     extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3779 
3780   // Build the requested new merge, padding with undef.
3781   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3782                                   TargetOpcode::G_ANYEXT);
3783 
3784   // Pack into the original result register.
3785   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3786 
3787   MI.eraseFromParent();
3788   return Legalized;
3789 }
3790 
3791 LegalizerHelper::LegalizeResult
3792 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
3793                                                            unsigned TypeIdx,
3794                                                            LLT NarrowVecTy) {
3795   Register DstReg = MI.getOperand(0).getReg();
3796   Register SrcVec = MI.getOperand(1).getReg();
3797   Register InsertVal;
3798   bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3799 
3800   assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3801   if (IsInsert)
3802     InsertVal = MI.getOperand(2).getReg();
3803 
3804   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3805 
3806   // TODO: Handle total scalarization case.
3807   if (!NarrowVecTy.isVector())
3808     return UnableToLegalize;
3809 
3810   LLT VecTy = MRI.getType(SrcVec);
3811 
3812   // If the index is a constant, we can really break this down as you would
3813   // expect, and index into the target size pieces.
3814   int64_t IdxVal;
3815   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
3816     // Avoid out of bounds indexing the pieces.
3817     if (IdxVal >= VecTy.getNumElements()) {
3818       MIRBuilder.buildUndef(DstReg);
3819       MI.eraseFromParent();
3820       return Legalized;
3821     }
3822 
3823     SmallVector<Register, 8> VecParts;
3824     LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3825 
3826     // Build a sequence of NarrowTy pieces in VecParts for this operand.
3827     LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3828                                     TargetOpcode::G_ANYEXT);
3829 
3830     unsigned NewNumElts = NarrowVecTy.getNumElements();
3831 
3832     LLT IdxTy = MRI.getType(Idx);
3833     int64_t PartIdx = IdxVal / NewNumElts;
3834     auto NewIdx =
3835         MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3836 
3837     if (IsInsert) {
3838       LLT PartTy = MRI.getType(VecParts[PartIdx]);
3839 
3840       // Use the adjusted index to insert into one of the subvectors.
3841       auto InsertPart = MIRBuilder.buildInsertVectorElement(
3842           PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3843       VecParts[PartIdx] = InsertPart.getReg(0);
3844 
3845       // Recombine the inserted subvector with the others to reform the result
3846       // vector.
3847       buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3848     } else {
3849       MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3850     }
3851 
3852     MI.eraseFromParent();
3853     return Legalized;
3854   }
3855 
3856   // With a variable index, we can't perform the operation in a smaller type, so
3857   // we're forced to expand this.
3858   //
3859   // TODO: We could emit a chain of compare/select to figure out which piece to
3860   // index.
3861   return lowerExtractInsertVectorElt(MI);
3862 }
3863 
3864 LegalizerHelper::LegalizeResult
3865 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3866                                       LLT NarrowTy) {
3867   // FIXME: Don't know how to handle secondary types yet.
3868   if (TypeIdx != 0)
3869     return UnableToLegalize;
3870 
3871   MachineMemOperand *MMO = *MI.memoperands_begin();
3872 
3873   // This implementation doesn't work for atomics. Give up instead of doing
3874   // something invalid.
3875   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3876       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3877     return UnableToLegalize;
3878 
3879   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3880   Register ValReg = MI.getOperand(0).getReg();
3881   Register AddrReg = MI.getOperand(1).getReg();
3882   LLT ValTy = MRI.getType(ValReg);
3883 
3884   // FIXME: Do we need a distinct NarrowMemory legalize action?
3885   if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3886     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3887     return UnableToLegalize;
3888   }
3889 
3890   int NumParts = -1;
3891   int NumLeftover = -1;
3892   LLT LeftoverTy;
3893   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3894   if (IsLoad) {
3895     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3896   } else {
3897     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3898                      NarrowLeftoverRegs)) {
3899       NumParts = NarrowRegs.size();
3900       NumLeftover = NarrowLeftoverRegs.size();
3901     }
3902   }
3903 
3904   if (NumParts == -1)
3905     return UnableToLegalize;
3906 
3907   LLT PtrTy = MRI.getType(AddrReg);
3908   const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3909 
3910   unsigned TotalSize = ValTy.getSizeInBits();
3911 
3912   // Split the load/store into PartTy sized pieces starting at Offset. If this
3913   // is a load, return the new registers in ValRegs. For a store, each elements
3914   // of ValRegs should be PartTy. Returns the next offset that needs to be
3915   // handled.
3916   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3917                              unsigned Offset) -> unsigned {
3918     MachineFunction &MF = MIRBuilder.getMF();
3919     unsigned PartSize = PartTy.getSizeInBits();
3920     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3921          Offset += PartSize, ++Idx) {
3922       unsigned ByteSize = PartSize / 8;
3923       unsigned ByteOffset = Offset / 8;
3924       Register NewAddrReg;
3925 
3926       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3927 
3928       MachineMemOperand *NewMMO =
3929         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3930 
3931       if (IsLoad) {
3932         Register Dst = MRI.createGenericVirtualRegister(PartTy);
3933         ValRegs.push_back(Dst);
3934         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3935       } else {
3936         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3937       }
3938     }
3939 
3940     return Offset;
3941   };
3942 
3943   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3944 
3945   // Handle the rest of the register if this isn't an even type breakdown.
3946   if (LeftoverTy.isValid())
3947     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3948 
3949   if (IsLoad) {
3950     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3951                 LeftoverTy, NarrowLeftoverRegs);
3952   }
3953 
3954   MI.eraseFromParent();
3955   return Legalized;
3956 }
3957 
3958 LegalizerHelper::LegalizeResult
3959 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
3960                                       LLT NarrowTy) {
3961   assert(TypeIdx == 0 && "only one type index expected");
3962 
3963   const unsigned Opc = MI.getOpcode();
3964   const int NumOps = MI.getNumOperands() - 1;
3965   const Register DstReg = MI.getOperand(0).getReg();
3966   const unsigned Flags = MI.getFlags();
3967   const unsigned NarrowSize = NarrowTy.getSizeInBits();
3968   const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3969 
3970   assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3971 
3972   // First of all check whether we are narrowing (changing the element type)
3973   // or reducing the vector elements
3974   const LLT DstTy = MRI.getType(DstReg);
3975   const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3976 
3977   SmallVector<Register, 8> ExtractedRegs[3];
3978   SmallVector<Register, 8> Parts;
3979 
3980   unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3981 
3982   // Break down all the sources into NarrowTy pieces we can operate on. This may
3983   // involve creating merges to a wider type, padded with undef.
3984   for (int I = 0; I != NumOps; ++I) {
3985     Register SrcReg = MI.getOperand(I + 1).getReg();
3986     LLT SrcTy = MRI.getType(SrcReg);
3987 
3988     // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3989     // For fewerElements, this is a smaller vector with the same element type.
3990     LLT OpNarrowTy;
3991     if (IsNarrow) {
3992       OpNarrowTy = NarrowScalarTy;
3993 
3994       // In case of narrowing, we need to cast vectors to scalars for this to
3995       // work properly
3996       // FIXME: Can we do without the bitcast here if we're narrowing?
3997       if (SrcTy.isVector()) {
3998         SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3999         SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
4000       }
4001     } else {
4002       OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
4003     }
4004 
4005     LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
4006 
4007     // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
4008     buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
4009                         TargetOpcode::G_ANYEXT);
4010   }
4011 
4012   SmallVector<Register, 8> ResultRegs;
4013 
4014   // Input operands for each sub-instruction.
4015   SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
4016 
4017   int NumParts = ExtractedRegs[0].size();
4018   const unsigned DstSize = DstTy.getSizeInBits();
4019   const LLT DstScalarTy = LLT::scalar(DstSize);
4020 
4021   // Narrowing needs to use scalar types
4022   LLT DstLCMTy, NarrowDstTy;
4023   if (IsNarrow) {
4024     DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
4025     NarrowDstTy = NarrowScalarTy;
4026   } else {
4027     DstLCMTy = getLCMType(DstTy, NarrowTy);
4028     NarrowDstTy = NarrowTy;
4029   }
4030 
4031   // We widened the source registers to satisfy merge/unmerge size
4032   // constraints. We'll have some extra fully undef parts.
4033   const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
4034 
4035   for (int I = 0; I != NumRealParts; ++I) {
4036     // Emit this instruction on each of the split pieces.
4037     for (int J = 0; J != NumOps; ++J)
4038       InputRegs[J] = ExtractedRegs[J][I];
4039 
4040     auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
4041     ResultRegs.push_back(Inst.getReg(0));
4042   }
4043 
4044   // Fill out the widened result with undef instead of creating instructions
4045   // with undef inputs.
4046   int NumUndefParts = NumParts - NumRealParts;
4047   if (NumUndefParts != 0)
4048     ResultRegs.append(NumUndefParts,
4049                       MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
4050 
4051   // Extract the possibly padded result. Use a scratch register if we need to do
4052   // a final bitcast, otherwise use the original result register.
4053   Register MergeDstReg;
4054   if (IsNarrow && DstTy.isVector())
4055     MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
4056   else
4057     MergeDstReg = DstReg;
4058 
4059   buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
4060 
4061   // Recast to vector if we narrowed a vector
4062   if (IsNarrow && DstTy.isVector())
4063     MIRBuilder.buildBitcast(DstReg, MergeDstReg);
4064 
4065   MI.eraseFromParent();
4066   return Legalized;
4067 }
4068 
4069 LegalizerHelper::LegalizeResult
4070 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
4071                                               LLT NarrowTy) {
4072   Register DstReg = MI.getOperand(0).getReg();
4073   Register SrcReg = MI.getOperand(1).getReg();
4074   int64_t Imm = MI.getOperand(2).getImm();
4075 
4076   LLT DstTy = MRI.getType(DstReg);
4077 
4078   SmallVector<Register, 8> Parts;
4079   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4080   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
4081 
4082   for (Register &R : Parts)
4083     R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
4084 
4085   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4086 
4087   MI.eraseFromParent();
4088   return Legalized;
4089 }
4090 
4091 LegalizerHelper::LegalizeResult
4092 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
4093                                      LLT NarrowTy) {
4094   using namespace TargetOpcode;
4095 
4096   switch (MI.getOpcode()) {
4097   case G_IMPLICIT_DEF:
4098     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
4099   case G_TRUNC:
4100   case G_AND:
4101   case G_OR:
4102   case G_XOR:
4103   case G_ADD:
4104   case G_SUB:
4105   case G_MUL:
4106   case G_PTR_ADD:
4107   case G_SMULH:
4108   case G_UMULH:
4109   case G_FADD:
4110   case G_FMUL:
4111   case G_FSUB:
4112   case G_FNEG:
4113   case G_FABS:
4114   case G_FCANONICALIZE:
4115   case G_FDIV:
4116   case G_FREM:
4117   case G_FMA:
4118   case G_FMAD:
4119   case G_FPOW:
4120   case G_FEXP:
4121   case G_FEXP2:
4122   case G_FLOG:
4123   case G_FLOG2:
4124   case G_FLOG10:
4125   case G_FNEARBYINT:
4126   case G_FCEIL:
4127   case G_FFLOOR:
4128   case G_FRINT:
4129   case G_INTRINSIC_ROUND:
4130   case G_INTRINSIC_ROUNDEVEN:
4131   case G_INTRINSIC_TRUNC:
4132   case G_FCOS:
4133   case G_FSIN:
4134   case G_FSQRT:
4135   case G_BSWAP:
4136   case G_BITREVERSE:
4137   case G_SDIV:
4138   case G_UDIV:
4139   case G_SREM:
4140   case G_UREM:
4141   case G_SMIN:
4142   case G_SMAX:
4143   case G_UMIN:
4144   case G_UMAX:
4145   case G_FMINNUM:
4146   case G_FMAXNUM:
4147   case G_FMINNUM_IEEE:
4148   case G_FMAXNUM_IEEE:
4149   case G_FMINIMUM:
4150   case G_FMAXIMUM:
4151   case G_FSHL:
4152   case G_FSHR:
4153   case G_FREEZE:
4154   case G_SADDSAT:
4155   case G_SSUBSAT:
4156   case G_UADDSAT:
4157   case G_USUBSAT:
4158     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4159   case G_UMULO:
4160   case G_SMULO:
4161     return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
4162   case G_SHL:
4163   case G_LSHR:
4164   case G_ASHR:
4165   case G_SSHLSAT:
4166   case G_USHLSAT:
4167   case G_CTLZ:
4168   case G_CTLZ_ZERO_UNDEF:
4169   case G_CTTZ:
4170   case G_CTTZ_ZERO_UNDEF:
4171   case G_CTPOP:
4172   case G_FCOPYSIGN:
4173     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4174   case G_ZEXT:
4175   case G_SEXT:
4176   case G_ANYEXT:
4177   case G_FPEXT:
4178   case G_FPTRUNC:
4179   case G_SITOFP:
4180   case G_UITOFP:
4181   case G_FPTOSI:
4182   case G_FPTOUI:
4183   case G_INTTOPTR:
4184   case G_PTRTOINT:
4185   case G_ADDRSPACE_CAST:
4186     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4187   case G_ICMP:
4188   case G_FCMP:
4189     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4190   case G_SELECT:
4191     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4192   case G_PHI:
4193     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4194   case G_UNMERGE_VALUES:
4195     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4196   case G_BUILD_VECTOR:
4197     assert(TypeIdx == 0 && "not a vector type index");
4198     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4199   case G_CONCAT_VECTORS:
4200     if (TypeIdx != 1) // TODO: This probably does work as expected already.
4201       return UnableToLegalize;
4202     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4203   case G_EXTRACT_VECTOR_ELT:
4204   case G_INSERT_VECTOR_ELT:
4205     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4206   case G_LOAD:
4207   case G_STORE:
4208     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4209   case G_SEXT_INREG:
4210     return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4211   GISEL_VECREDUCE_CASES_NONSEQ
4212     return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4213   default:
4214     return UnableToLegalize;
4215   }
4216 }
4217 
4218 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4219     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4220   unsigned Opc = MI.getOpcode();
4221   assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4222          Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4223          "Sequential reductions not expected");
4224 
4225   if (TypeIdx != 1)
4226     return UnableToLegalize;
4227 
4228   // The semantics of the normal non-sequential reductions allow us to freely
4229   // re-associate the operation.
4230   Register SrcReg = MI.getOperand(1).getReg();
4231   LLT SrcTy = MRI.getType(SrcReg);
4232   Register DstReg = MI.getOperand(0).getReg();
4233   LLT DstTy = MRI.getType(DstReg);
4234 
4235   if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
4236     return UnableToLegalize;
4237 
4238   SmallVector<Register> SplitSrcs;
4239   const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
4240   extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4241   SmallVector<Register> PartialReductions;
4242   for (unsigned Part = 0; Part < NumParts; ++Part) {
4243     PartialReductions.push_back(
4244         MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4245   }
4246 
4247   unsigned ScalarOpc;
4248   switch (Opc) {
4249   case TargetOpcode::G_VECREDUCE_FADD:
4250     ScalarOpc = TargetOpcode::G_FADD;
4251     break;
4252   case TargetOpcode::G_VECREDUCE_FMUL:
4253     ScalarOpc = TargetOpcode::G_FMUL;
4254     break;
4255   case TargetOpcode::G_VECREDUCE_FMAX:
4256     ScalarOpc = TargetOpcode::G_FMAXNUM;
4257     break;
4258   case TargetOpcode::G_VECREDUCE_FMIN:
4259     ScalarOpc = TargetOpcode::G_FMINNUM;
4260     break;
4261   case TargetOpcode::G_VECREDUCE_ADD:
4262     ScalarOpc = TargetOpcode::G_ADD;
4263     break;
4264   case TargetOpcode::G_VECREDUCE_MUL:
4265     ScalarOpc = TargetOpcode::G_MUL;
4266     break;
4267   case TargetOpcode::G_VECREDUCE_AND:
4268     ScalarOpc = TargetOpcode::G_AND;
4269     break;
4270   case TargetOpcode::G_VECREDUCE_OR:
4271     ScalarOpc = TargetOpcode::G_OR;
4272     break;
4273   case TargetOpcode::G_VECREDUCE_XOR:
4274     ScalarOpc = TargetOpcode::G_XOR;
4275     break;
4276   case TargetOpcode::G_VECREDUCE_SMAX:
4277     ScalarOpc = TargetOpcode::G_SMAX;
4278     break;
4279   case TargetOpcode::G_VECREDUCE_SMIN:
4280     ScalarOpc = TargetOpcode::G_SMIN;
4281     break;
4282   case TargetOpcode::G_VECREDUCE_UMAX:
4283     ScalarOpc = TargetOpcode::G_UMAX;
4284     break;
4285   case TargetOpcode::G_VECREDUCE_UMIN:
4286     ScalarOpc = TargetOpcode::G_UMIN;
4287     break;
4288   default:
4289     LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
4290     return UnableToLegalize;
4291   }
4292 
4293   // If the types involved are powers of 2, we can generate intermediate vector
4294   // ops, before generating a final reduction operation.
4295   if (isPowerOf2_32(SrcTy.getNumElements()) &&
4296       isPowerOf2_32(NarrowTy.getNumElements())) {
4297     return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4298   }
4299 
4300   Register Acc = PartialReductions[0];
4301   for (unsigned Part = 1; Part < NumParts; ++Part) {
4302     if (Part == NumParts - 1) {
4303       MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4304                             {Acc, PartialReductions[Part]});
4305     } else {
4306       Acc = MIRBuilder
4307                 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4308                 .getReg(0);
4309     }
4310   }
4311   MI.eraseFromParent();
4312   return Legalized;
4313 }
4314 
4315 LegalizerHelper::LegalizeResult
4316 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4317                                         LLT SrcTy, LLT NarrowTy,
4318                                         unsigned ScalarOpc) {
4319   SmallVector<Register> SplitSrcs;
4320   // Split the sources into NarrowTy size pieces.
4321   extractParts(SrcReg, NarrowTy,
4322                SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4323   // We're going to do a tree reduction using vector operations until we have
4324   // one NarrowTy size value left.
4325   while (SplitSrcs.size() > 1) {
4326     SmallVector<Register> PartialRdxs;
4327     for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4328       Register LHS = SplitSrcs[Idx];
4329       Register RHS = SplitSrcs[Idx + 1];
4330       // Create the intermediate vector op.
4331       Register Res =
4332           MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4333       PartialRdxs.push_back(Res);
4334     }
4335     SplitSrcs = std::move(PartialRdxs);
4336   }
4337   // Finally generate the requested NarrowTy based reduction.
4338   Observer.changingInstr(MI);
4339   MI.getOperand(1).setReg(SplitSrcs[0]);
4340   Observer.changedInstr(MI);
4341   return Legalized;
4342 }
4343 
4344 LegalizerHelper::LegalizeResult
4345 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
4346                                              const LLT HalfTy, const LLT AmtTy) {
4347 
4348   Register InL = MRI.createGenericVirtualRegister(HalfTy);
4349   Register InH = MRI.createGenericVirtualRegister(HalfTy);
4350   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4351 
4352   if (Amt.isNullValue()) {
4353     MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4354     MI.eraseFromParent();
4355     return Legalized;
4356   }
4357 
4358   LLT NVT = HalfTy;
4359   unsigned NVTBits = HalfTy.getSizeInBits();
4360   unsigned VTBits = 2 * NVTBits;
4361 
4362   SrcOp Lo(Register(0)), Hi(Register(0));
4363   if (MI.getOpcode() == TargetOpcode::G_SHL) {
4364     if (Amt.ugt(VTBits)) {
4365       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4366     } else if (Amt.ugt(NVTBits)) {
4367       Lo = MIRBuilder.buildConstant(NVT, 0);
4368       Hi = MIRBuilder.buildShl(NVT, InL,
4369                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4370     } else if (Amt == NVTBits) {
4371       Lo = MIRBuilder.buildConstant(NVT, 0);
4372       Hi = InL;
4373     } else {
4374       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4375       auto OrLHS =
4376           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4377       auto OrRHS = MIRBuilder.buildLShr(
4378           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4379       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4380     }
4381   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4382     if (Amt.ugt(VTBits)) {
4383       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4384     } else if (Amt.ugt(NVTBits)) {
4385       Lo = MIRBuilder.buildLShr(NVT, InH,
4386                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4387       Hi = MIRBuilder.buildConstant(NVT, 0);
4388     } else if (Amt == NVTBits) {
4389       Lo = InH;
4390       Hi = MIRBuilder.buildConstant(NVT, 0);
4391     } else {
4392       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4393 
4394       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4395       auto OrRHS = MIRBuilder.buildShl(
4396           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4397 
4398       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4399       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4400     }
4401   } else {
4402     if (Amt.ugt(VTBits)) {
4403       Hi = Lo = MIRBuilder.buildAShr(
4404           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4405     } else if (Amt.ugt(NVTBits)) {
4406       Lo = MIRBuilder.buildAShr(NVT, InH,
4407                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4408       Hi = MIRBuilder.buildAShr(NVT, InH,
4409                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4410     } else if (Amt == NVTBits) {
4411       Lo = InH;
4412       Hi = MIRBuilder.buildAShr(NVT, InH,
4413                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4414     } else {
4415       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4416 
4417       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4418       auto OrRHS = MIRBuilder.buildShl(
4419           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4420 
4421       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4422       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4423     }
4424   }
4425 
4426   MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4427   MI.eraseFromParent();
4428 
4429   return Legalized;
4430 }
4431 
4432 // TODO: Optimize if constant shift amount.
4433 LegalizerHelper::LegalizeResult
4434 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
4435                                    LLT RequestedTy) {
4436   if (TypeIdx == 1) {
4437     Observer.changingInstr(MI);
4438     narrowScalarSrc(MI, RequestedTy, 2);
4439     Observer.changedInstr(MI);
4440     return Legalized;
4441   }
4442 
4443   Register DstReg = MI.getOperand(0).getReg();
4444   LLT DstTy = MRI.getType(DstReg);
4445   if (DstTy.isVector())
4446     return UnableToLegalize;
4447 
4448   Register Amt = MI.getOperand(2).getReg();
4449   LLT ShiftAmtTy = MRI.getType(Amt);
4450   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4451   if (DstEltSize % 2 != 0)
4452     return UnableToLegalize;
4453 
4454   // Ignore the input type. We can only go to exactly half the size of the
4455   // input. If that isn't small enough, the resulting pieces will be further
4456   // legalized.
4457   const unsigned NewBitSize = DstEltSize / 2;
4458   const LLT HalfTy = LLT::scalar(NewBitSize);
4459   const LLT CondTy = LLT::scalar(1);
4460 
4461   if (const MachineInstr *KShiftAmt =
4462           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4463     return narrowScalarShiftByConstant(
4464         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4465   }
4466 
4467   // TODO: Expand with known bits.
4468 
4469   // Handle the fully general expansion by an unknown amount.
4470   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4471 
4472   Register InL = MRI.createGenericVirtualRegister(HalfTy);
4473   Register InH = MRI.createGenericVirtualRegister(HalfTy);
4474   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4475 
4476   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4477   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4478 
4479   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4480   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4481   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4482 
4483   Register ResultRegs[2];
4484   switch (MI.getOpcode()) {
4485   case TargetOpcode::G_SHL: {
4486     // Short: ShAmt < NewBitSize
4487     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4488 
4489     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4490     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4491     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4492 
4493     // Long: ShAmt >= NewBitSize
4494     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
4495     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4496 
4497     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4498     auto Hi = MIRBuilder.buildSelect(
4499         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4500 
4501     ResultRegs[0] = Lo.getReg(0);
4502     ResultRegs[1] = Hi.getReg(0);
4503     break;
4504   }
4505   case TargetOpcode::G_LSHR:
4506   case TargetOpcode::G_ASHR: {
4507     // Short: ShAmt < NewBitSize
4508     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4509 
4510     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4511     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4512     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4513 
4514     // Long: ShAmt >= NewBitSize
4515     MachineInstrBuilder HiL;
4516     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4517       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
4518     } else {
4519       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4520       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
4521     }
4522     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4523                                      {InH, AmtExcess});     // Lo from Hi part.
4524 
4525     auto Lo = MIRBuilder.buildSelect(
4526         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4527 
4528     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4529 
4530     ResultRegs[0] = Lo.getReg(0);
4531     ResultRegs[1] = Hi.getReg(0);
4532     break;
4533   }
4534   default:
4535     llvm_unreachable("not a shift");
4536   }
4537 
4538   MIRBuilder.buildMerge(DstReg, ResultRegs);
4539   MI.eraseFromParent();
4540   return Legalized;
4541 }
4542 
4543 LegalizerHelper::LegalizeResult
4544 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
4545                                        LLT MoreTy) {
4546   assert(TypeIdx == 0 && "Expecting only Idx 0");
4547 
4548   Observer.changingInstr(MI);
4549   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4550     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4551     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4552     moreElementsVectorSrc(MI, MoreTy, I);
4553   }
4554 
4555   MachineBasicBlock &MBB = *MI.getParent();
4556   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
4557   moreElementsVectorDst(MI, MoreTy, 0);
4558   Observer.changedInstr(MI);
4559   return Legalized;
4560 }
4561 
4562 LegalizerHelper::LegalizeResult
4563 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
4564                                     LLT MoreTy) {
4565   unsigned Opc = MI.getOpcode();
4566   switch (Opc) {
4567   case TargetOpcode::G_IMPLICIT_DEF:
4568   case TargetOpcode::G_LOAD: {
4569     if (TypeIdx != 0)
4570       return UnableToLegalize;
4571     Observer.changingInstr(MI);
4572     moreElementsVectorDst(MI, MoreTy, 0);
4573     Observer.changedInstr(MI);
4574     return Legalized;
4575   }
4576   case TargetOpcode::G_STORE:
4577     if (TypeIdx != 0)
4578       return UnableToLegalize;
4579     Observer.changingInstr(MI);
4580     moreElementsVectorSrc(MI, MoreTy, 0);
4581     Observer.changedInstr(MI);
4582     return Legalized;
4583   case TargetOpcode::G_AND:
4584   case TargetOpcode::G_OR:
4585   case TargetOpcode::G_XOR:
4586   case TargetOpcode::G_SMIN:
4587   case TargetOpcode::G_SMAX:
4588   case TargetOpcode::G_UMIN:
4589   case TargetOpcode::G_UMAX:
4590   case TargetOpcode::G_FMINNUM:
4591   case TargetOpcode::G_FMAXNUM:
4592   case TargetOpcode::G_FMINNUM_IEEE:
4593   case TargetOpcode::G_FMAXNUM_IEEE:
4594   case TargetOpcode::G_FMINIMUM:
4595   case TargetOpcode::G_FMAXIMUM: {
4596     Observer.changingInstr(MI);
4597     moreElementsVectorSrc(MI, MoreTy, 1);
4598     moreElementsVectorSrc(MI, MoreTy, 2);
4599     moreElementsVectorDst(MI, MoreTy, 0);
4600     Observer.changedInstr(MI);
4601     return Legalized;
4602   }
4603   case TargetOpcode::G_EXTRACT:
4604     if (TypeIdx != 1)
4605       return UnableToLegalize;
4606     Observer.changingInstr(MI);
4607     moreElementsVectorSrc(MI, MoreTy, 1);
4608     Observer.changedInstr(MI);
4609     return Legalized;
4610   case TargetOpcode::G_INSERT:
4611   case TargetOpcode::G_FREEZE:
4612     if (TypeIdx != 0)
4613       return UnableToLegalize;
4614     Observer.changingInstr(MI);
4615     moreElementsVectorSrc(MI, MoreTy, 1);
4616     moreElementsVectorDst(MI, MoreTy, 0);
4617     Observer.changedInstr(MI);
4618     return Legalized;
4619   case TargetOpcode::G_SELECT:
4620     if (TypeIdx != 0)
4621       return UnableToLegalize;
4622     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4623       return UnableToLegalize;
4624 
4625     Observer.changingInstr(MI);
4626     moreElementsVectorSrc(MI, MoreTy, 2);
4627     moreElementsVectorSrc(MI, MoreTy, 3);
4628     moreElementsVectorDst(MI, MoreTy, 0);
4629     Observer.changedInstr(MI);
4630     return Legalized;
4631   case TargetOpcode::G_UNMERGE_VALUES: {
4632     if (TypeIdx != 1)
4633       return UnableToLegalize;
4634 
4635     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4636     int NumDst = MI.getNumOperands() - 1;
4637     moreElementsVectorSrc(MI, MoreTy, NumDst);
4638 
4639     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4640     for (int I = 0; I != NumDst; ++I)
4641       MIB.addDef(MI.getOperand(I).getReg());
4642 
4643     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
4644     for (int I = NumDst; I != NewNumDst; ++I)
4645       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
4646 
4647     MIB.addUse(MI.getOperand(NumDst).getReg());
4648     MI.eraseFromParent();
4649     return Legalized;
4650   }
4651   case TargetOpcode::G_PHI:
4652     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
4653   default:
4654     return UnableToLegalize;
4655   }
4656 }
4657 
4658 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
4659                                         ArrayRef<Register> Src1Regs,
4660                                         ArrayRef<Register> Src2Regs,
4661                                         LLT NarrowTy) {
4662   MachineIRBuilder &B = MIRBuilder;
4663   unsigned SrcParts = Src1Regs.size();
4664   unsigned DstParts = DstRegs.size();
4665 
4666   unsigned DstIdx = 0; // Low bits of the result.
4667   Register FactorSum =
4668       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4669   DstRegs[DstIdx] = FactorSum;
4670 
4671   unsigned CarrySumPrevDstIdx;
4672   SmallVector<Register, 4> Factors;
4673 
4674   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4675     // Collect low parts of muls for DstIdx.
4676     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4677          i <= std::min(DstIdx, SrcParts - 1); ++i) {
4678       MachineInstrBuilder Mul =
4679           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4680       Factors.push_back(Mul.getReg(0));
4681     }
4682     // Collect high parts of muls from previous DstIdx.
4683     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4684          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
4685       MachineInstrBuilder Umulh =
4686           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4687       Factors.push_back(Umulh.getReg(0));
4688     }
4689     // Add CarrySum from additions calculated for previous DstIdx.
4690     if (DstIdx != 1) {
4691       Factors.push_back(CarrySumPrevDstIdx);
4692     }
4693 
4694     Register CarrySum;
4695     // Add all factors and accumulate all carries into CarrySum.
4696     if (DstIdx != DstParts - 1) {
4697       MachineInstrBuilder Uaddo =
4698           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
4699       FactorSum = Uaddo.getReg(0);
4700       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
4701       for (unsigned i = 2; i < Factors.size(); ++i) {
4702         MachineInstrBuilder Uaddo =
4703             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
4704         FactorSum = Uaddo.getReg(0);
4705         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
4706         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4707       }
4708     } else {
4709       // Since value for the next index is not calculated, neither is CarrySum.
4710       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4711       for (unsigned i = 2; i < Factors.size(); ++i)
4712         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4713     }
4714 
4715     CarrySumPrevDstIdx = CarrySum;
4716     DstRegs[DstIdx] = FactorSum;
4717     Factors.clear();
4718   }
4719 }
4720 
4721 LegalizerHelper::LegalizeResult
4722 LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
4723                                     LLT NarrowTy) {
4724   if (TypeIdx != 0)
4725     return UnableToLegalize;
4726 
4727   Register DstReg = MI.getOperand(0).getReg();
4728   LLT DstType = MRI.getType(DstReg);
4729   // FIXME: add support for vector types
4730   if (DstType.isVector())
4731     return UnableToLegalize;
4732 
4733   uint64_t SizeOp0 = DstType.getSizeInBits();
4734   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4735 
4736   // FIXME: add support for when SizeOp0 isn't an exact multiple of
4737   // NarrowSize.
4738   if (SizeOp0 % NarrowSize != 0)
4739     return UnableToLegalize;
4740 
4741   // Expand in terms of carry-setting/consuming G_<Op>E instructions.
4742   int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
4743 
4744   unsigned Opcode = MI.getOpcode();
4745   unsigned OpO, OpE, OpF;
4746   switch (Opcode) {
4747   case TargetOpcode::G_SADDO:
4748   case TargetOpcode::G_SADDE:
4749   case TargetOpcode::G_UADDO:
4750   case TargetOpcode::G_UADDE:
4751   case TargetOpcode::G_ADD:
4752     OpO = TargetOpcode::G_UADDO;
4753     OpE = TargetOpcode::G_UADDE;
4754     OpF = TargetOpcode::G_UADDE;
4755     if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
4756       OpF = TargetOpcode::G_SADDE;
4757     break;
4758   case TargetOpcode::G_SSUBO:
4759   case TargetOpcode::G_SSUBE:
4760   case TargetOpcode::G_USUBO:
4761   case TargetOpcode::G_USUBE:
4762   case TargetOpcode::G_SUB:
4763     OpO = TargetOpcode::G_USUBO;
4764     OpE = TargetOpcode::G_USUBE;
4765     OpF = TargetOpcode::G_USUBE;
4766     if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
4767       OpF = TargetOpcode::G_SSUBE;
4768     break;
4769   default:
4770     llvm_unreachable("Unexpected add/sub opcode!");
4771   }
4772 
4773   // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
4774   unsigned NumDefs = MI.getNumExplicitDefs();
4775   Register Src1 = MI.getOperand(NumDefs).getReg();
4776   Register Src2 = MI.getOperand(NumDefs + 1).getReg();
4777   Register CarryDst;
4778   if (NumDefs == 2)
4779     CarryDst = MI.getOperand(1).getReg();
4780   Register CarryIn;
4781   if (MI.getNumOperands() == NumDefs + 3)
4782     CarryIn = MI.getOperand(NumDefs + 2).getReg();
4783 
4784   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
4785   extractParts(Src1, NarrowTy, NumParts, Src1Regs);
4786   extractParts(Src2, NarrowTy, NumParts, Src2Regs);
4787 
4788   for (int i = 0; i < NumParts; ++i) {
4789     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4790     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
4791     // Forward the final carry-out to the destination register
4792     if (i == NumParts - 1 && CarryDst)
4793       CarryOut = CarryDst;
4794 
4795     if (!CarryIn) {
4796       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
4797                             {Src1Regs[i], Src2Regs[i]});
4798     } else if (i == NumParts - 1) {
4799       MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
4800                             {Src1Regs[i], Src2Regs[i], CarryIn});
4801     } else {
4802       MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
4803                             {Src1Regs[i], Src2Regs[i], CarryIn});
4804     }
4805 
4806     DstRegs.push_back(DstReg);
4807     CarryIn = CarryOut;
4808   }
4809   MIRBuilder.buildMerge(DstReg, DstRegs);
4810   MI.eraseFromParent();
4811   return Legalized;
4812 }
4813 
4814 LegalizerHelper::LegalizeResult
4815 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
4816   Register DstReg = MI.getOperand(0).getReg();
4817   Register Src1 = MI.getOperand(1).getReg();
4818   Register Src2 = MI.getOperand(2).getReg();
4819 
4820   LLT Ty = MRI.getType(DstReg);
4821   if (Ty.isVector())
4822     return UnableToLegalize;
4823 
4824   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
4825   unsigned DstSize = Ty.getSizeInBits();
4826   unsigned NarrowSize = NarrowTy.getSizeInBits();
4827   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
4828     return UnableToLegalize;
4829 
4830   unsigned NumDstParts = DstSize / NarrowSize;
4831   unsigned NumSrcParts = SrcSize / NarrowSize;
4832   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
4833   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
4834 
4835   SmallVector<Register, 2> Src1Parts, Src2Parts;
4836   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
4837   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
4838   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
4839   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
4840 
4841   // Take only high half of registers if this is high mul.
4842   ArrayRef<Register> DstRegs(
4843       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
4844   MIRBuilder.buildMerge(DstReg, DstRegs);
4845   MI.eraseFromParent();
4846   return Legalized;
4847 }
4848 
4849 LegalizerHelper::LegalizeResult
4850 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
4851                                      LLT NarrowTy) {
4852   if (TypeIdx != 1)
4853     return UnableToLegalize;
4854 
4855   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4856 
4857   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4858   // FIXME: add support for when SizeOp1 isn't an exact multiple of
4859   // NarrowSize.
4860   if (SizeOp1 % NarrowSize != 0)
4861     return UnableToLegalize;
4862   int NumParts = SizeOp1 / NarrowSize;
4863 
4864   SmallVector<Register, 2> SrcRegs, DstRegs;
4865   SmallVector<uint64_t, 2> Indexes;
4866   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4867 
4868   Register OpReg = MI.getOperand(0).getReg();
4869   uint64_t OpStart = MI.getOperand(2).getImm();
4870   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4871   for (int i = 0; i < NumParts; ++i) {
4872     unsigned SrcStart = i * NarrowSize;
4873 
4874     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
4875       // No part of the extract uses this subregister, ignore it.
4876       continue;
4877     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4878       // The entire subregister is extracted, forward the value.
4879       DstRegs.push_back(SrcRegs[i]);
4880       continue;
4881     }
4882 
4883     // OpSegStart is where this destination segment would start in OpReg if it
4884     // extended infinitely in both directions.
4885     int64_t ExtractOffset;
4886     uint64_t SegSize;
4887     if (OpStart < SrcStart) {
4888       ExtractOffset = 0;
4889       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
4890     } else {
4891       ExtractOffset = OpStart - SrcStart;
4892       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
4893     }
4894 
4895     Register SegReg = SrcRegs[i];
4896     if (ExtractOffset != 0 || SegSize != NarrowSize) {
4897       // A genuine extract is needed.
4898       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4899       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
4900     }
4901 
4902     DstRegs.push_back(SegReg);
4903   }
4904 
4905   Register DstReg = MI.getOperand(0).getReg();
4906   if (MRI.getType(DstReg).isVector())
4907     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4908   else if (DstRegs.size() > 1)
4909     MIRBuilder.buildMerge(DstReg, DstRegs);
4910   else
4911     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
4912   MI.eraseFromParent();
4913   return Legalized;
4914 }
4915 
4916 LegalizerHelper::LegalizeResult
4917 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
4918                                     LLT NarrowTy) {
4919   // FIXME: Don't know how to handle secondary types yet.
4920   if (TypeIdx != 0)
4921     return UnableToLegalize;
4922 
4923   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4924   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4925 
4926   // FIXME: add support for when SizeOp0 isn't an exact multiple of
4927   // NarrowSize.
4928   if (SizeOp0 % NarrowSize != 0)
4929     return UnableToLegalize;
4930 
4931   int NumParts = SizeOp0 / NarrowSize;
4932 
4933   SmallVector<Register, 2> SrcRegs, DstRegs;
4934   SmallVector<uint64_t, 2> Indexes;
4935   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4936 
4937   Register OpReg = MI.getOperand(2).getReg();
4938   uint64_t OpStart = MI.getOperand(3).getImm();
4939   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4940   for (int i = 0; i < NumParts; ++i) {
4941     unsigned DstStart = i * NarrowSize;
4942 
4943     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
4944       // No part of the insert affects this subregister, forward the original.
4945       DstRegs.push_back(SrcRegs[i]);
4946       continue;
4947     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4948       // The entire subregister is defined by this insert, forward the new
4949       // value.
4950       DstRegs.push_back(OpReg);
4951       continue;
4952     }
4953 
4954     // OpSegStart is where this destination segment would start in OpReg if it
4955     // extended infinitely in both directions.
4956     int64_t ExtractOffset, InsertOffset;
4957     uint64_t SegSize;
4958     if (OpStart < DstStart) {
4959       InsertOffset = 0;
4960       ExtractOffset = DstStart - OpStart;
4961       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
4962     } else {
4963       InsertOffset = OpStart - DstStart;
4964       ExtractOffset = 0;
4965       SegSize =
4966         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
4967     }
4968 
4969     Register SegReg = OpReg;
4970     if (ExtractOffset != 0 || SegSize != OpSize) {
4971       // A genuine extract is needed.
4972       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4973       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
4974     }
4975 
4976     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4977     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
4978     DstRegs.push_back(DstReg);
4979   }
4980 
4981   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
4982   Register DstReg = MI.getOperand(0).getReg();
4983   if(MRI.getType(DstReg).isVector())
4984     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4985   else
4986     MIRBuilder.buildMerge(DstReg, DstRegs);
4987   MI.eraseFromParent();
4988   return Legalized;
4989 }
4990 
4991 LegalizerHelper::LegalizeResult
4992 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
4993                                    LLT NarrowTy) {
4994   Register DstReg = MI.getOperand(0).getReg();
4995   LLT DstTy = MRI.getType(DstReg);
4996 
4997   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
4998 
4999   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
5000   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
5001   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
5002   LLT LeftoverTy;
5003   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
5004                     Src0Regs, Src0LeftoverRegs))
5005     return UnableToLegalize;
5006 
5007   LLT Unused;
5008   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
5009                     Src1Regs, Src1LeftoverRegs))
5010     llvm_unreachable("inconsistent extractParts result");
5011 
5012   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
5013     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
5014                                         {Src0Regs[I], Src1Regs[I]});
5015     DstRegs.push_back(Inst.getReg(0));
5016   }
5017 
5018   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
5019     auto Inst = MIRBuilder.buildInstr(
5020       MI.getOpcode(),
5021       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
5022     DstLeftoverRegs.push_back(Inst.getReg(0));
5023   }
5024 
5025   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
5026               LeftoverTy, DstLeftoverRegs);
5027 
5028   MI.eraseFromParent();
5029   return Legalized;
5030 }
5031 
5032 LegalizerHelper::LegalizeResult
5033 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
5034                                  LLT NarrowTy) {
5035   if (TypeIdx != 0)
5036     return UnableToLegalize;
5037 
5038   Register DstReg = MI.getOperand(0).getReg();
5039   Register SrcReg = MI.getOperand(1).getReg();
5040 
5041   LLT DstTy = MRI.getType(DstReg);
5042   if (DstTy.isVector())
5043     return UnableToLegalize;
5044 
5045   SmallVector<Register, 8> Parts;
5046   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
5047   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
5048   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
5049 
5050   MI.eraseFromParent();
5051   return Legalized;
5052 }
5053 
5054 LegalizerHelper::LegalizeResult
5055 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
5056                                     LLT NarrowTy) {
5057   if (TypeIdx != 0)
5058     return UnableToLegalize;
5059 
5060   Register CondReg = MI.getOperand(1).getReg();
5061   LLT CondTy = MRI.getType(CondReg);
5062   if (CondTy.isVector()) // TODO: Handle vselect
5063     return UnableToLegalize;
5064 
5065   Register DstReg = MI.getOperand(0).getReg();
5066   LLT DstTy = MRI.getType(DstReg);
5067 
5068   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
5069   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
5070   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
5071   LLT LeftoverTy;
5072   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
5073                     Src1Regs, Src1LeftoverRegs))
5074     return UnableToLegalize;
5075 
5076   LLT Unused;
5077   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
5078                     Src2Regs, Src2LeftoverRegs))
5079     llvm_unreachable("inconsistent extractParts result");
5080 
5081   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
5082     auto Select = MIRBuilder.buildSelect(NarrowTy,
5083                                          CondReg, Src1Regs[I], Src2Regs[I]);
5084     DstRegs.push_back(Select.getReg(0));
5085   }
5086 
5087   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
5088     auto Select = MIRBuilder.buildSelect(
5089       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
5090     DstLeftoverRegs.push_back(Select.getReg(0));
5091   }
5092 
5093   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
5094               LeftoverTy, DstLeftoverRegs);
5095 
5096   MI.eraseFromParent();
5097   return Legalized;
5098 }
5099 
5100 LegalizerHelper::LegalizeResult
5101 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
5102                                   LLT NarrowTy) {
5103   if (TypeIdx != 1)
5104     return UnableToLegalize;
5105 
5106   Register DstReg = MI.getOperand(0).getReg();
5107   Register SrcReg = MI.getOperand(1).getReg();
5108   LLT DstTy = MRI.getType(DstReg);
5109   LLT SrcTy = MRI.getType(SrcReg);
5110   unsigned NarrowSize = NarrowTy.getSizeInBits();
5111 
5112   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5113     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
5114 
5115     MachineIRBuilder &B = MIRBuilder;
5116     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
5117     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
5118     auto C_0 = B.buildConstant(NarrowTy, 0);
5119     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
5120                                 UnmergeSrc.getReg(1), C_0);
5121     auto LoCTLZ = IsUndef ?
5122       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
5123       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
5124     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
5125     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
5126     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
5127     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
5128 
5129     MI.eraseFromParent();
5130     return Legalized;
5131   }
5132 
5133   return UnableToLegalize;
5134 }
5135 
5136 LegalizerHelper::LegalizeResult
5137 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
5138                                   LLT NarrowTy) {
5139   if (TypeIdx != 1)
5140     return UnableToLegalize;
5141 
5142   Register DstReg = MI.getOperand(0).getReg();
5143   Register SrcReg = MI.getOperand(1).getReg();
5144   LLT DstTy = MRI.getType(DstReg);
5145   LLT SrcTy = MRI.getType(SrcReg);
5146   unsigned NarrowSize = NarrowTy.getSizeInBits();
5147 
5148   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5149     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
5150 
5151     MachineIRBuilder &B = MIRBuilder;
5152     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
5153     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
5154     auto C_0 = B.buildConstant(NarrowTy, 0);
5155     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
5156                                 UnmergeSrc.getReg(0), C_0);
5157     auto HiCTTZ = IsUndef ?
5158       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
5159       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
5160     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
5161     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
5162     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
5163     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
5164 
5165     MI.eraseFromParent();
5166     return Legalized;
5167   }
5168 
5169   return UnableToLegalize;
5170 }
5171 
5172 LegalizerHelper::LegalizeResult
5173 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
5174                                    LLT NarrowTy) {
5175   if (TypeIdx != 1)
5176     return UnableToLegalize;
5177 
5178   Register DstReg = MI.getOperand(0).getReg();
5179   LLT DstTy = MRI.getType(DstReg);
5180   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5181   unsigned NarrowSize = NarrowTy.getSizeInBits();
5182 
5183   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5184     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
5185 
5186     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
5187     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
5188     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
5189 
5190     MI.eraseFromParent();
5191     return Legalized;
5192   }
5193 
5194   return UnableToLegalize;
5195 }
5196 
5197 LegalizerHelper::LegalizeResult
5198 LegalizerHelper::lowerBitCount(MachineInstr &MI) {
5199   unsigned Opc = MI.getOpcode();
5200   const auto &TII = MIRBuilder.getTII();
5201   auto isSupported = [this](const LegalityQuery &Q) {
5202     auto QAction = LI.getAction(Q).Action;
5203     return QAction == Legal || QAction == Libcall || QAction == Custom;
5204   };
5205   switch (Opc) {
5206   default:
5207     return UnableToLegalize;
5208   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
5209     // This trivially expands to CTLZ.
5210     Observer.changingInstr(MI);
5211     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
5212     Observer.changedInstr(MI);
5213     return Legalized;
5214   }
5215   case TargetOpcode::G_CTLZ: {
5216     Register DstReg = MI.getOperand(0).getReg();
5217     Register SrcReg = MI.getOperand(1).getReg();
5218     LLT DstTy = MRI.getType(DstReg);
5219     LLT SrcTy = MRI.getType(SrcReg);
5220     unsigned Len = SrcTy.getSizeInBits();
5221 
5222     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
5223       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
5224       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
5225       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
5226       auto ICmp = MIRBuilder.buildICmp(
5227           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
5228       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
5229       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
5230       MI.eraseFromParent();
5231       return Legalized;
5232     }
5233     // for now, we do this:
5234     // NewLen = NextPowerOf2(Len);
5235     // x = x | (x >> 1);
5236     // x = x | (x >> 2);
5237     // ...
5238     // x = x | (x >>16);
5239     // x = x | (x >>32); // for 64-bit input
5240     // Upto NewLen/2
5241     // return Len - popcount(x);
5242     //
5243     // Ref: "Hacker's Delight" by Henry Warren
5244     Register Op = SrcReg;
5245     unsigned NewLen = PowerOf2Ceil(Len);
5246     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
5247       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
5248       auto MIBOp = MIRBuilder.buildOr(
5249           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
5250       Op = MIBOp.getReg(0);
5251     }
5252     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
5253     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
5254                         MIBPop);
5255     MI.eraseFromParent();
5256     return Legalized;
5257   }
5258   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
5259     // This trivially expands to CTTZ.
5260     Observer.changingInstr(MI);
5261     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
5262     Observer.changedInstr(MI);
5263     return Legalized;
5264   }
5265   case TargetOpcode::G_CTTZ: {
5266     Register DstReg = MI.getOperand(0).getReg();
5267     Register SrcReg = MI.getOperand(1).getReg();
5268     LLT DstTy = MRI.getType(DstReg);
5269     LLT SrcTy = MRI.getType(SrcReg);
5270 
5271     unsigned Len = SrcTy.getSizeInBits();
5272     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
5273       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
5274       // zero.
5275       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
5276       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
5277       auto ICmp = MIRBuilder.buildICmp(
5278           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
5279       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
5280       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
5281       MI.eraseFromParent();
5282       return Legalized;
5283     }
5284     // for now, we use: { return popcount(~x & (x - 1)); }
5285     // unless the target has ctlz but not ctpop, in which case we use:
5286     // { return 32 - nlz(~x & (x-1)); }
5287     // Ref: "Hacker's Delight" by Henry Warren
5288     auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
5289     auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
5290     auto MIBTmp = MIRBuilder.buildAnd(
5291         SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
5292     if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
5293         isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
5294       auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
5295       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
5296                           MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
5297       MI.eraseFromParent();
5298       return Legalized;
5299     }
5300     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
5301     MI.getOperand(1).setReg(MIBTmp.getReg(0));
5302     return Legalized;
5303   }
5304   case TargetOpcode::G_CTPOP: {
5305     Register SrcReg = MI.getOperand(1).getReg();
5306     LLT Ty = MRI.getType(SrcReg);
5307     unsigned Size = Ty.getSizeInBits();
5308     MachineIRBuilder &B = MIRBuilder;
5309 
5310     // Count set bits in blocks of 2 bits. Default approach would be
5311     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
5312     // We use following formula instead:
5313     // B2Count = val - { (val >> 1) & 0x55555555 }
5314     // since it gives same result in blocks of 2 with one instruction less.
5315     auto C_1 = B.buildConstant(Ty, 1);
5316     auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
5317     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
5318     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
5319     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
5320     auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
5321 
5322     // In order to get count in blocks of 4 add values from adjacent block of 2.
5323     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
5324     auto C_2 = B.buildConstant(Ty, 2);
5325     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
5326     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
5327     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
5328     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
5329     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
5330     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
5331 
5332     // For count in blocks of 8 bits we don't have to mask high 4 bits before
5333     // addition since count value sits in range {0,...,8} and 4 bits are enough
5334     // to hold such binary values. After addition high 4 bits still hold count
5335     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
5336     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
5337     auto C_4 = B.buildConstant(Ty, 4);
5338     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
5339     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
5340     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
5341     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
5342     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
5343 
5344     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
5345     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
5346     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
5347     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
5348     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
5349 
5350     // Shift count result from 8 high bits to low bits.
5351     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
5352     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
5353 
5354     MI.eraseFromParent();
5355     return Legalized;
5356   }
5357   }
5358 }
5359 
5360 // Check that (every element of) Reg is undef or not an exact multiple of BW.
5361 static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
5362                                         Register Reg, unsigned BW) {
5363   return matchUnaryPredicate(
5364       MRI, Reg,
5365       [=](const Constant *C) {
5366         // Null constant here means an undef.
5367         const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
5368         return !CI || CI->getValue().urem(BW) != 0;
5369       },
5370       /*AllowUndefs*/ true);
5371 }
5372 
5373 LegalizerHelper::LegalizeResult
5374 LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
5375   Register Dst = MI.getOperand(0).getReg();
5376   Register X = MI.getOperand(1).getReg();
5377   Register Y = MI.getOperand(2).getReg();
5378   Register Z = MI.getOperand(3).getReg();
5379   LLT Ty = MRI.getType(Dst);
5380   LLT ShTy = MRI.getType(Z);
5381 
5382   unsigned BW = Ty.getScalarSizeInBits();
5383   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5384   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5385 
5386   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
5387     // fshl X, Y, Z -> fshr X, Y, -Z
5388     // fshr X, Y, Z -> fshl X, Y, -Z
5389     auto Zero = MIRBuilder.buildConstant(ShTy, 0);
5390     Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
5391   } else {
5392     // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
5393     // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
5394     auto One = MIRBuilder.buildConstant(ShTy, 1);
5395     if (IsFSHL) {
5396       Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
5397       X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
5398     } else {
5399       X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
5400       Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
5401     }
5402 
5403     Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
5404   }
5405 
5406   MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
5407   MI.eraseFromParent();
5408   return Legalized;
5409 }
5410 
5411 LegalizerHelper::LegalizeResult
5412 LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
5413   Register Dst = MI.getOperand(0).getReg();
5414   Register X = MI.getOperand(1).getReg();
5415   Register Y = MI.getOperand(2).getReg();
5416   Register Z = MI.getOperand(3).getReg();
5417   LLT Ty = MRI.getType(Dst);
5418   LLT ShTy = MRI.getType(Z);
5419 
5420   const unsigned BW = Ty.getScalarSizeInBits();
5421   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5422 
5423   Register ShX, ShY;
5424   Register ShAmt, InvShAmt;
5425 
5426   // FIXME: Emit optimized urem by constant instead of letting it expand later.
5427   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
5428     // fshl: X << C | Y >> (BW - C)
5429     // fshr: X << (BW - C) | Y >> C
5430     // where C = Z % BW is not zero
5431     auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
5432     ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
5433     InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
5434     ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
5435     ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
5436   } else {
5437     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
5438     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
5439     auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
5440     if (isPowerOf2_32(BW)) {
5441       // Z % BW -> Z & (BW - 1)
5442       ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
5443       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
5444       auto NotZ = MIRBuilder.buildNot(ShTy, Z);
5445       InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
5446     } else {
5447       auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
5448       ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
5449       InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
5450     }
5451 
5452     auto One = MIRBuilder.buildConstant(ShTy, 1);
5453     if (IsFSHL) {
5454       ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
5455       auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
5456       ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
5457     } else {
5458       auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
5459       ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
5460       ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
5461     }
5462   }
5463 
5464   MIRBuilder.buildOr(Dst, ShX, ShY);
5465   MI.eraseFromParent();
5466   return Legalized;
5467 }
5468 
5469 LegalizerHelper::LegalizeResult
5470 LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
5471   // These operations approximately do the following (while avoiding undefined
5472   // shifts by BW):
5473   // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5474   // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5475   Register Dst = MI.getOperand(0).getReg();
5476   LLT Ty = MRI.getType(Dst);
5477   LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
5478 
5479   bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5480   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5481   if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
5482     return lowerFunnelShiftAsShifts(MI);
5483   return lowerFunnelShiftWithInverse(MI);
5484 }
5485 
5486 LegalizerHelper::LegalizeResult
5487 LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
5488   Register Dst = MI.getOperand(0).getReg();
5489   Register Src = MI.getOperand(1).getReg();
5490   Register Amt = MI.getOperand(2).getReg();
5491   LLT AmtTy = MRI.getType(Amt);
5492   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
5493   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
5494   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
5495   auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
5496   MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
5497   MI.eraseFromParent();
5498   return Legalized;
5499 }
5500 
5501 LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
5502   Register Dst = MI.getOperand(0).getReg();
5503   Register Src = MI.getOperand(1).getReg();
5504   Register Amt = MI.getOperand(2).getReg();
5505   LLT DstTy = MRI.getType(Dst);
5506   LLT SrcTy = MRI.getType(Dst);
5507   LLT AmtTy = MRI.getType(Amt);
5508 
5509   unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
5510   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
5511 
5512   MIRBuilder.setInstrAndDebugLoc(MI);
5513 
5514   // If a rotate in the other direction is supported, use it.
5515   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
5516   if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
5517       isPowerOf2_32(EltSizeInBits))
5518     return lowerRotateWithReverseRotate(MI);
5519 
5520   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
5521   unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
5522   unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
5523   auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
5524   Register ShVal;
5525   Register RevShiftVal;
5526   if (isPowerOf2_32(EltSizeInBits)) {
5527     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
5528     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
5529     auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
5530     auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
5531     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
5532     auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
5533     RevShiftVal =
5534         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
5535   } else {
5536     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
5537     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
5538     auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
5539     auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
5540     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
5541     auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
5542     auto One = MIRBuilder.buildConstant(AmtTy, 1);
5543     auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
5544     RevShiftVal =
5545         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
5546   }
5547   MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
5548   MI.eraseFromParent();
5549   return Legalized;
5550 }
5551 
5552 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
5553 // representation.
5554 LegalizerHelper::LegalizeResult
5555 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
5556   Register Dst = MI.getOperand(0).getReg();
5557   Register Src = MI.getOperand(1).getReg();
5558   const LLT S64 = LLT::scalar(64);
5559   const LLT S32 = LLT::scalar(32);
5560   const LLT S1 = LLT::scalar(1);
5561 
5562   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
5563 
5564   // unsigned cul2f(ulong u) {
5565   //   uint lz = clz(u);
5566   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
5567   //   u = (u << lz) & 0x7fffffffffffffffUL;
5568   //   ulong t = u & 0xffffffffffUL;
5569   //   uint v = (e << 23) | (uint)(u >> 40);
5570   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
5571   //   return as_float(v + r);
5572   // }
5573 
5574   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
5575   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
5576 
5577   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
5578 
5579   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
5580   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
5581 
5582   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
5583   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
5584 
5585   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
5586   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
5587 
5588   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
5589 
5590   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
5591   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
5592 
5593   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
5594   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
5595   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
5596 
5597   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
5598   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
5599   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
5600   auto One = MIRBuilder.buildConstant(S32, 1);
5601 
5602   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
5603   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
5604   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
5605   MIRBuilder.buildAdd(Dst, V, R);
5606 
5607   MI.eraseFromParent();
5608   return Legalized;
5609 }
5610 
5611 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
5612   Register Dst = MI.getOperand(0).getReg();
5613   Register Src = MI.getOperand(1).getReg();
5614   LLT DstTy = MRI.getType(Dst);
5615   LLT SrcTy = MRI.getType(Src);
5616 
5617   if (SrcTy == LLT::scalar(1)) {
5618     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
5619     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5620     MIRBuilder.buildSelect(Dst, Src, True, False);
5621     MI.eraseFromParent();
5622     return Legalized;
5623   }
5624 
5625   if (SrcTy != LLT::scalar(64))
5626     return UnableToLegalize;
5627 
5628   if (DstTy == LLT::scalar(32)) {
5629     // TODO: SelectionDAG has several alternative expansions to port which may
5630     // be more reasonble depending on the available instructions. If a target
5631     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
5632     // intermediate type, this is probably worse.
5633     return lowerU64ToF32BitOps(MI);
5634   }
5635 
5636   return UnableToLegalize;
5637 }
5638 
5639 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
5640   Register Dst = MI.getOperand(0).getReg();
5641   Register Src = MI.getOperand(1).getReg();
5642   LLT DstTy = MRI.getType(Dst);
5643   LLT SrcTy = MRI.getType(Src);
5644 
5645   const LLT S64 = LLT::scalar(64);
5646   const LLT S32 = LLT::scalar(32);
5647   const LLT S1 = LLT::scalar(1);
5648 
5649   if (SrcTy == S1) {
5650     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
5651     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5652     MIRBuilder.buildSelect(Dst, Src, True, False);
5653     MI.eraseFromParent();
5654     return Legalized;
5655   }
5656 
5657   if (SrcTy != S64)
5658     return UnableToLegalize;
5659 
5660   if (DstTy == S32) {
5661     // signed cl2f(long l) {
5662     //   long s = l >> 63;
5663     //   float r = cul2f((l + s) ^ s);
5664     //   return s ? -r : r;
5665     // }
5666     Register L = Src;
5667     auto SignBit = MIRBuilder.buildConstant(S64, 63);
5668     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
5669 
5670     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
5671     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
5672     auto R = MIRBuilder.buildUITOFP(S32, Xor);
5673 
5674     auto RNeg = MIRBuilder.buildFNeg(S32, R);
5675     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
5676                                             MIRBuilder.buildConstant(S64, 0));
5677     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
5678     MI.eraseFromParent();
5679     return Legalized;
5680   }
5681 
5682   return UnableToLegalize;
5683 }
5684 
5685 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
5686   Register Dst = MI.getOperand(0).getReg();
5687   Register Src = MI.getOperand(1).getReg();
5688   LLT DstTy = MRI.getType(Dst);
5689   LLT SrcTy = MRI.getType(Src);
5690   const LLT S64 = LLT::scalar(64);
5691   const LLT S32 = LLT::scalar(32);
5692 
5693   if (SrcTy != S64 && SrcTy != S32)
5694     return UnableToLegalize;
5695   if (DstTy != S32 && DstTy != S64)
5696     return UnableToLegalize;
5697 
5698   // FPTOSI gives same result as FPTOUI for positive signed integers.
5699   // FPTOUI needs to deal with fp values that convert to unsigned integers
5700   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
5701 
5702   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
5703   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
5704                                                 : APFloat::IEEEdouble(),
5705                     APInt::getNullValue(SrcTy.getSizeInBits()));
5706   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
5707 
5708   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
5709 
5710   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
5711   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
5712   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
5713   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
5714   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
5715   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
5716   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
5717 
5718   const LLT S1 = LLT::scalar(1);
5719 
5720   MachineInstrBuilder FCMP =
5721       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
5722   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
5723 
5724   MI.eraseFromParent();
5725   return Legalized;
5726 }
5727 
5728 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
5729   Register Dst = MI.getOperand(0).getReg();
5730   Register Src = MI.getOperand(1).getReg();
5731   LLT DstTy = MRI.getType(Dst);
5732   LLT SrcTy = MRI.getType(Src);
5733   const LLT S64 = LLT::scalar(64);
5734   const LLT S32 = LLT::scalar(32);
5735 
5736   // FIXME: Only f32 to i64 conversions are supported.
5737   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
5738     return UnableToLegalize;
5739 
5740   // Expand f32 -> i64 conversion
5741   // This algorithm comes from compiler-rt's implementation of fixsfdi:
5742   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
5743 
5744   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
5745 
5746   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
5747   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
5748 
5749   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
5750   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
5751 
5752   auto SignMask = MIRBuilder.buildConstant(SrcTy,
5753                                            APInt::getSignMask(SrcEltBits));
5754   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
5755   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
5756   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
5757   Sign = MIRBuilder.buildSExt(DstTy, Sign);
5758 
5759   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
5760   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
5761   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
5762 
5763   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
5764   R = MIRBuilder.buildZExt(DstTy, R);
5765 
5766   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
5767   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
5768   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
5769   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
5770 
5771   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
5772   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
5773 
5774   const LLT S1 = LLT::scalar(1);
5775   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
5776                                     S1, Exponent, ExponentLoBit);
5777 
5778   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
5779 
5780   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
5781   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
5782 
5783   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
5784 
5785   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
5786                                           S1, Exponent, ZeroSrcTy);
5787 
5788   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
5789   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
5790 
5791   MI.eraseFromParent();
5792   return Legalized;
5793 }
5794 
5795 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
5796 LegalizerHelper::LegalizeResult
5797 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
5798   Register Dst = MI.getOperand(0).getReg();
5799   Register Src = MI.getOperand(1).getReg();
5800 
5801   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
5802     return UnableToLegalize;
5803 
5804   const unsigned ExpMask = 0x7ff;
5805   const unsigned ExpBiasf64 = 1023;
5806   const unsigned ExpBiasf16 = 15;
5807   const LLT S32 = LLT::scalar(32);
5808   const LLT S1 = LLT::scalar(1);
5809 
5810   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
5811   Register U = Unmerge.getReg(0);
5812   Register UH = Unmerge.getReg(1);
5813 
5814   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
5815   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
5816 
5817   // Subtract the fp64 exponent bias (1023) to get the real exponent and
5818   // add the f16 bias (15) to get the biased exponent for the f16 format.
5819   E = MIRBuilder.buildAdd(
5820     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
5821 
5822   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
5823   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
5824 
5825   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
5826                                        MIRBuilder.buildConstant(S32, 0x1ff));
5827   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
5828 
5829   auto Zero = MIRBuilder.buildConstant(S32, 0);
5830   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
5831   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
5832   M = MIRBuilder.buildOr(S32, M, Lo40Set);
5833 
5834   // (M != 0 ? 0x0200 : 0) | 0x7c00;
5835   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
5836   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
5837   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
5838 
5839   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
5840   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
5841 
5842   // N = M | (E << 12);
5843   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
5844   auto N = MIRBuilder.buildOr(S32, M, EShl12);
5845 
5846   // B = clamp(1-E, 0, 13);
5847   auto One = MIRBuilder.buildConstant(S32, 1);
5848   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
5849   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
5850   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
5851 
5852   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
5853                                        MIRBuilder.buildConstant(S32, 0x1000));
5854 
5855   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
5856   auto D0 = MIRBuilder.buildShl(S32, D, B);
5857 
5858   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
5859                                              D0, SigSetHigh);
5860   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
5861   D = MIRBuilder.buildOr(S32, D, D1);
5862 
5863   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
5864   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
5865 
5866   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
5867   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
5868 
5869   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
5870                                        MIRBuilder.buildConstant(S32, 3));
5871   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
5872 
5873   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
5874                                        MIRBuilder.buildConstant(S32, 5));
5875   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
5876 
5877   V1 = MIRBuilder.buildOr(S32, V0, V1);
5878   V = MIRBuilder.buildAdd(S32, V, V1);
5879 
5880   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
5881                                        E, MIRBuilder.buildConstant(S32, 30));
5882   V = MIRBuilder.buildSelect(S32, CmpEGt30,
5883                              MIRBuilder.buildConstant(S32, 0x7c00), V);
5884 
5885   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
5886                                          E, MIRBuilder.buildConstant(S32, 1039));
5887   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
5888 
5889   // Extract the sign bit.
5890   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
5891   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
5892 
5893   // Insert the sign bit
5894   V = MIRBuilder.buildOr(S32, Sign, V);
5895 
5896   MIRBuilder.buildTrunc(Dst, V);
5897   MI.eraseFromParent();
5898   return Legalized;
5899 }
5900 
5901 LegalizerHelper::LegalizeResult
5902 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
5903   Register Dst = MI.getOperand(0).getReg();
5904   Register Src = MI.getOperand(1).getReg();
5905 
5906   LLT DstTy = MRI.getType(Dst);
5907   LLT SrcTy = MRI.getType(Src);
5908   const LLT S64 = LLT::scalar(64);
5909   const LLT S16 = LLT::scalar(16);
5910 
5911   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
5912     return lowerFPTRUNC_F64_TO_F16(MI);
5913 
5914   return UnableToLegalize;
5915 }
5916 
5917 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
5918 // multiplication tree.
5919 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
5920   Register Dst = MI.getOperand(0).getReg();
5921   Register Src0 = MI.getOperand(1).getReg();
5922   Register Src1 = MI.getOperand(2).getReg();
5923   LLT Ty = MRI.getType(Dst);
5924 
5925   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
5926   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
5927   MI.eraseFromParent();
5928   return Legalized;
5929 }
5930 
5931 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
5932   switch (Opc) {
5933   case TargetOpcode::G_SMIN:
5934     return CmpInst::ICMP_SLT;
5935   case TargetOpcode::G_SMAX:
5936     return CmpInst::ICMP_SGT;
5937   case TargetOpcode::G_UMIN:
5938     return CmpInst::ICMP_ULT;
5939   case TargetOpcode::G_UMAX:
5940     return CmpInst::ICMP_UGT;
5941   default:
5942     llvm_unreachable("not in integer min/max");
5943   }
5944 }
5945 
5946 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
5947   Register Dst = MI.getOperand(0).getReg();
5948   Register Src0 = MI.getOperand(1).getReg();
5949   Register Src1 = MI.getOperand(2).getReg();
5950 
5951   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
5952   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
5953 
5954   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
5955   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
5956 
5957   MI.eraseFromParent();
5958   return Legalized;
5959 }
5960 
5961 LegalizerHelper::LegalizeResult
5962 LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
5963   Register Dst = MI.getOperand(0).getReg();
5964   Register Src0 = MI.getOperand(1).getReg();
5965   Register Src1 = MI.getOperand(2).getReg();
5966 
5967   const LLT Src0Ty = MRI.getType(Src0);
5968   const LLT Src1Ty = MRI.getType(Src1);
5969 
5970   const int Src0Size = Src0Ty.getScalarSizeInBits();
5971   const int Src1Size = Src1Ty.getScalarSizeInBits();
5972 
5973   auto SignBitMask = MIRBuilder.buildConstant(
5974     Src0Ty, APInt::getSignMask(Src0Size));
5975 
5976   auto NotSignBitMask = MIRBuilder.buildConstant(
5977     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
5978 
5979   Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
5980   Register And1;
5981   if (Src0Ty == Src1Ty) {
5982     And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
5983   } else if (Src0Size > Src1Size) {
5984     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
5985     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
5986     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
5987     And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
5988   } else {
5989     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
5990     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
5991     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
5992     And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
5993   }
5994 
5995   // Be careful about setting nsz/nnan/ninf on every instruction, since the
5996   // constants are a nan and -0.0, but the final result should preserve
5997   // everything.
5998   unsigned Flags = MI.getFlags();
5999   MIRBuilder.buildOr(Dst, And0, And1, Flags);
6000 
6001   MI.eraseFromParent();
6002   return Legalized;
6003 }
6004 
6005 LegalizerHelper::LegalizeResult
6006 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
6007   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
6008     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
6009 
6010   Register Dst = MI.getOperand(0).getReg();
6011   Register Src0 = MI.getOperand(1).getReg();
6012   Register Src1 = MI.getOperand(2).getReg();
6013   LLT Ty = MRI.getType(Dst);
6014 
6015   if (!MI.getFlag(MachineInstr::FmNoNans)) {
6016     // Insert canonicalizes if it's possible we need to quiet to get correct
6017     // sNaN behavior.
6018 
6019     // Note this must be done here, and not as an optimization combine in the
6020     // absence of a dedicate quiet-snan instruction as we're using an
6021     // omni-purpose G_FCANONICALIZE.
6022     if (!isKnownNeverSNaN(Src0, MRI))
6023       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
6024 
6025     if (!isKnownNeverSNaN(Src1, MRI))
6026       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
6027   }
6028 
6029   // If there are no nans, it's safe to simply replace this with the non-IEEE
6030   // version.
6031   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
6032   MI.eraseFromParent();
6033   return Legalized;
6034 }
6035 
6036 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
6037   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
6038   Register DstReg = MI.getOperand(0).getReg();
6039   LLT Ty = MRI.getType(DstReg);
6040   unsigned Flags = MI.getFlags();
6041 
6042   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
6043                                   Flags);
6044   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
6045   MI.eraseFromParent();
6046   return Legalized;
6047 }
6048 
6049 LegalizerHelper::LegalizeResult
6050 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
6051   Register DstReg = MI.getOperand(0).getReg();
6052   Register X = MI.getOperand(1).getReg();
6053   const unsigned Flags = MI.getFlags();
6054   const LLT Ty = MRI.getType(DstReg);
6055   const LLT CondTy = Ty.changeElementSize(1);
6056 
6057   // round(x) =>
6058   //  t = trunc(x);
6059   //  d = fabs(x - t);
6060   //  o = copysign(1.0f, x);
6061   //  return t + (d >= 0.5 ? o : 0.0);
6062 
6063   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
6064 
6065   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
6066   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
6067   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
6068   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
6069   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
6070   auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
6071 
6072   auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
6073                                   Flags);
6074   auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
6075 
6076   MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
6077 
6078   MI.eraseFromParent();
6079   return Legalized;
6080 }
6081 
6082 LegalizerHelper::LegalizeResult
6083 LegalizerHelper::lowerFFloor(MachineInstr &MI) {
6084   Register DstReg = MI.getOperand(0).getReg();
6085   Register SrcReg = MI.getOperand(1).getReg();
6086   unsigned Flags = MI.getFlags();
6087   LLT Ty = MRI.getType(DstReg);
6088   const LLT CondTy = Ty.changeElementSize(1);
6089 
6090   // result = trunc(src);
6091   // if (src < 0.0 && src != result)
6092   //   result += -1.0.
6093 
6094   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
6095   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
6096 
6097   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
6098                                   SrcReg, Zero, Flags);
6099   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
6100                                       SrcReg, Trunc, Flags);
6101   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
6102   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
6103 
6104   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
6105   MI.eraseFromParent();
6106   return Legalized;
6107 }
6108 
6109 LegalizerHelper::LegalizeResult
6110 LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
6111   const unsigned NumOps = MI.getNumOperands();
6112   Register DstReg = MI.getOperand(0).getReg();
6113   Register Src0Reg = MI.getOperand(1).getReg();
6114   LLT DstTy = MRI.getType(DstReg);
6115   LLT SrcTy = MRI.getType(Src0Reg);
6116   unsigned PartSize = SrcTy.getSizeInBits();
6117 
6118   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
6119   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
6120 
6121   for (unsigned I = 2; I != NumOps; ++I) {
6122     const unsigned Offset = (I - 1) * PartSize;
6123 
6124     Register SrcReg = MI.getOperand(I).getReg();
6125     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
6126 
6127     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
6128       MRI.createGenericVirtualRegister(WideTy);
6129 
6130     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
6131     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
6132     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
6133     ResultReg = NextResult;
6134   }
6135 
6136   if (DstTy.isPointer()) {
6137     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
6138           DstTy.getAddressSpace())) {
6139       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
6140       return UnableToLegalize;
6141     }
6142 
6143     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
6144   }
6145 
6146   MI.eraseFromParent();
6147   return Legalized;
6148 }
6149 
6150 LegalizerHelper::LegalizeResult
6151 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
6152   const unsigned NumDst = MI.getNumOperands() - 1;
6153   Register SrcReg = MI.getOperand(NumDst).getReg();
6154   Register Dst0Reg = MI.getOperand(0).getReg();
6155   LLT DstTy = MRI.getType(Dst0Reg);
6156   if (DstTy.isPointer())
6157     return UnableToLegalize; // TODO
6158 
6159   SrcReg = coerceToScalar(SrcReg);
6160   if (!SrcReg)
6161     return UnableToLegalize;
6162 
6163   // Expand scalarizing unmerge as bitcast to integer and shift.
6164   LLT IntTy = MRI.getType(SrcReg);
6165 
6166   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
6167 
6168   const unsigned DstSize = DstTy.getSizeInBits();
6169   unsigned Offset = DstSize;
6170   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
6171     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
6172     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
6173     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
6174   }
6175 
6176   MI.eraseFromParent();
6177   return Legalized;
6178 }
6179 
6180 /// Lower a vector extract or insert by writing the vector to a stack temporary
6181 /// and reloading the element or vector.
6182 ///
6183 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
6184 ///  =>
6185 ///  %stack_temp = G_FRAME_INDEX
6186 ///  G_STORE %vec, %stack_temp
6187 ///  %idx = clamp(%idx, %vec.getNumElements())
6188 ///  %element_ptr = G_PTR_ADD %stack_temp, %idx
6189 ///  %dst = G_LOAD %element_ptr
6190 LegalizerHelper::LegalizeResult
6191 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
6192   Register DstReg = MI.getOperand(0).getReg();
6193   Register SrcVec = MI.getOperand(1).getReg();
6194   Register InsertVal;
6195   if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
6196     InsertVal = MI.getOperand(2).getReg();
6197 
6198   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
6199 
6200   LLT VecTy = MRI.getType(SrcVec);
6201   LLT EltTy = VecTy.getElementType();
6202   if (!EltTy.isByteSized()) { // Not implemented.
6203     LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
6204     return UnableToLegalize;
6205   }
6206 
6207   unsigned EltBytes = EltTy.getSizeInBytes();
6208   Align VecAlign = getStackTemporaryAlignment(VecTy);
6209   Align EltAlign;
6210 
6211   MachinePointerInfo PtrInfo;
6212   auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
6213                                         VecAlign, PtrInfo);
6214   MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
6215 
6216   // Get the pointer to the element, and be sure not to hit undefined behavior
6217   // if the index is out of bounds.
6218   Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
6219 
6220   int64_t IdxVal;
6221   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
6222     int64_t Offset = IdxVal * EltBytes;
6223     PtrInfo = PtrInfo.getWithOffset(Offset);
6224     EltAlign = commonAlignment(VecAlign, Offset);
6225   } else {
6226     // We lose information with a variable offset.
6227     EltAlign = getStackTemporaryAlignment(EltTy);
6228     PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
6229   }
6230 
6231   if (InsertVal) {
6232     // Write the inserted element
6233     MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
6234 
6235     // Reload the whole vector.
6236     MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
6237   } else {
6238     MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
6239   }
6240 
6241   MI.eraseFromParent();
6242   return Legalized;
6243 }
6244 
6245 LegalizerHelper::LegalizeResult
6246 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
6247   Register DstReg = MI.getOperand(0).getReg();
6248   Register Src0Reg = MI.getOperand(1).getReg();
6249   Register Src1Reg = MI.getOperand(2).getReg();
6250   LLT Src0Ty = MRI.getType(Src0Reg);
6251   LLT DstTy = MRI.getType(DstReg);
6252   LLT IdxTy = LLT::scalar(32);
6253 
6254   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6255 
6256   if (DstTy.isScalar()) {
6257     if (Src0Ty.isVector())
6258       return UnableToLegalize;
6259 
6260     // This is just a SELECT.
6261     assert(Mask.size() == 1 && "Expected a single mask element");
6262     Register Val;
6263     if (Mask[0] < 0 || Mask[0] > 1)
6264       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
6265     else
6266       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
6267     MIRBuilder.buildCopy(DstReg, Val);
6268     MI.eraseFromParent();
6269     return Legalized;
6270   }
6271 
6272   Register Undef;
6273   SmallVector<Register, 32> BuildVec;
6274   LLT EltTy = DstTy.getElementType();
6275 
6276   for (int Idx : Mask) {
6277     if (Idx < 0) {
6278       if (!Undef.isValid())
6279         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
6280       BuildVec.push_back(Undef);
6281       continue;
6282     }
6283 
6284     if (Src0Ty.isScalar()) {
6285       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
6286     } else {
6287       int NumElts = Src0Ty.getNumElements();
6288       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
6289       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
6290       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
6291       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
6292       BuildVec.push_back(Extract.getReg(0));
6293     }
6294   }
6295 
6296   MIRBuilder.buildBuildVector(DstReg, BuildVec);
6297   MI.eraseFromParent();
6298   return Legalized;
6299 }
6300 
6301 LegalizerHelper::LegalizeResult
6302 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
6303   const auto &MF = *MI.getMF();
6304   const auto &TFI = *MF.getSubtarget().getFrameLowering();
6305   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
6306     return UnableToLegalize;
6307 
6308   Register Dst = MI.getOperand(0).getReg();
6309   Register AllocSize = MI.getOperand(1).getReg();
6310   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
6311 
6312   LLT PtrTy = MRI.getType(Dst);
6313   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
6314 
6315   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
6316   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
6317   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
6318 
6319   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
6320   // have to generate an extra instruction to negate the alloc and then use
6321   // G_PTR_ADD to add the negative offset.
6322   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
6323   if (Alignment > Align(1)) {
6324     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
6325     AlignMask.negate();
6326     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
6327     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
6328   }
6329 
6330   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
6331   MIRBuilder.buildCopy(SPReg, SPTmp);
6332   MIRBuilder.buildCopy(Dst, SPTmp);
6333 
6334   MI.eraseFromParent();
6335   return Legalized;
6336 }
6337 
6338 LegalizerHelper::LegalizeResult
6339 LegalizerHelper::lowerExtract(MachineInstr &MI) {
6340   Register Dst = MI.getOperand(0).getReg();
6341   Register Src = MI.getOperand(1).getReg();
6342   unsigned Offset = MI.getOperand(2).getImm();
6343 
6344   LLT DstTy = MRI.getType(Dst);
6345   LLT SrcTy = MRI.getType(Src);
6346 
6347   if (DstTy.isScalar() &&
6348       (SrcTy.isScalar() ||
6349        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
6350     LLT SrcIntTy = SrcTy;
6351     if (!SrcTy.isScalar()) {
6352       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
6353       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
6354     }
6355 
6356     if (Offset == 0)
6357       MIRBuilder.buildTrunc(Dst, Src);
6358     else {
6359       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
6360       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
6361       MIRBuilder.buildTrunc(Dst, Shr);
6362     }
6363 
6364     MI.eraseFromParent();
6365     return Legalized;
6366   }
6367 
6368   return UnableToLegalize;
6369 }
6370 
6371 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
6372   Register Dst = MI.getOperand(0).getReg();
6373   Register Src = MI.getOperand(1).getReg();
6374   Register InsertSrc = MI.getOperand(2).getReg();
6375   uint64_t Offset = MI.getOperand(3).getImm();
6376 
6377   LLT DstTy = MRI.getType(Src);
6378   LLT InsertTy = MRI.getType(InsertSrc);
6379 
6380   if (InsertTy.isVector() ||
6381       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
6382     return UnableToLegalize;
6383 
6384   const DataLayout &DL = MIRBuilder.getDataLayout();
6385   if ((DstTy.isPointer() &&
6386        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
6387       (InsertTy.isPointer() &&
6388        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
6389     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
6390     return UnableToLegalize;
6391   }
6392 
6393   LLT IntDstTy = DstTy;
6394 
6395   if (!DstTy.isScalar()) {
6396     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
6397     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
6398   }
6399 
6400   if (!InsertTy.isScalar()) {
6401     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
6402     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
6403   }
6404 
6405   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
6406   if (Offset != 0) {
6407     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
6408     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
6409   }
6410 
6411   APInt MaskVal = APInt::getBitsSetWithWrap(
6412       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
6413 
6414   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
6415   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
6416   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
6417 
6418   MIRBuilder.buildCast(Dst, Or);
6419   MI.eraseFromParent();
6420   return Legalized;
6421 }
6422 
6423 LegalizerHelper::LegalizeResult
6424 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
6425   Register Dst0 = MI.getOperand(0).getReg();
6426   Register Dst1 = MI.getOperand(1).getReg();
6427   Register LHS = MI.getOperand(2).getReg();
6428   Register RHS = MI.getOperand(3).getReg();
6429   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
6430 
6431   LLT Ty = MRI.getType(Dst0);
6432   LLT BoolTy = MRI.getType(Dst1);
6433 
6434   if (IsAdd)
6435     MIRBuilder.buildAdd(Dst0, LHS, RHS);
6436   else
6437     MIRBuilder.buildSub(Dst0, LHS, RHS);
6438 
6439   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
6440 
6441   auto Zero = MIRBuilder.buildConstant(Ty, 0);
6442 
6443   // For an addition, the result should be less than one of the operands (LHS)
6444   // if and only if the other operand (RHS) is negative, otherwise there will
6445   // be overflow.
6446   // For a subtraction, the result should be less than one of the operands
6447   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
6448   // otherwise there will be overflow.
6449   auto ResultLowerThanLHS =
6450       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
6451   auto ConditionRHS = MIRBuilder.buildICmp(
6452       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
6453 
6454   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
6455   MI.eraseFromParent();
6456   return Legalized;
6457 }
6458 
6459 LegalizerHelper::LegalizeResult
6460 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
6461   Register Res = MI.getOperand(0).getReg();
6462   Register LHS = MI.getOperand(1).getReg();
6463   Register RHS = MI.getOperand(2).getReg();
6464   LLT Ty = MRI.getType(Res);
6465   bool IsSigned;
6466   bool IsAdd;
6467   unsigned BaseOp;
6468   switch (MI.getOpcode()) {
6469   default:
6470     llvm_unreachable("unexpected addsat/subsat opcode");
6471   case TargetOpcode::G_UADDSAT:
6472     IsSigned = false;
6473     IsAdd = true;
6474     BaseOp = TargetOpcode::G_ADD;
6475     break;
6476   case TargetOpcode::G_SADDSAT:
6477     IsSigned = true;
6478     IsAdd = true;
6479     BaseOp = TargetOpcode::G_ADD;
6480     break;
6481   case TargetOpcode::G_USUBSAT:
6482     IsSigned = false;
6483     IsAdd = false;
6484     BaseOp = TargetOpcode::G_SUB;
6485     break;
6486   case TargetOpcode::G_SSUBSAT:
6487     IsSigned = true;
6488     IsAdd = false;
6489     BaseOp = TargetOpcode::G_SUB;
6490     break;
6491   }
6492 
6493   if (IsSigned) {
6494     // sadd.sat(a, b) ->
6495     //   hi = 0x7fffffff - smax(a, 0)
6496     //   lo = 0x80000000 - smin(a, 0)
6497     //   a + smin(smax(lo, b), hi)
6498     // ssub.sat(a, b) ->
6499     //   lo = smax(a, -1) - 0x7fffffff
6500     //   hi = smin(a, -1) - 0x80000000
6501     //   a - smin(smax(lo, b), hi)
6502     // TODO: AMDGPU can use a "median of 3" instruction here:
6503     //   a +/- med3(lo, b, hi)
6504     uint64_t NumBits = Ty.getScalarSizeInBits();
6505     auto MaxVal =
6506         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
6507     auto MinVal =
6508         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6509     MachineInstrBuilder Hi, Lo;
6510     if (IsAdd) {
6511       auto Zero = MIRBuilder.buildConstant(Ty, 0);
6512       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
6513       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
6514     } else {
6515       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
6516       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
6517                                MaxVal);
6518       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
6519                                MinVal);
6520     }
6521     auto RHSClamped =
6522         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
6523     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
6524   } else {
6525     // uadd.sat(a, b) -> a + umin(~a, b)
6526     // usub.sat(a, b) -> a - umin(a, b)
6527     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
6528     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
6529     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
6530   }
6531 
6532   MI.eraseFromParent();
6533   return Legalized;
6534 }
6535 
6536 LegalizerHelper::LegalizeResult
6537 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
6538   Register Res = MI.getOperand(0).getReg();
6539   Register LHS = MI.getOperand(1).getReg();
6540   Register RHS = MI.getOperand(2).getReg();
6541   LLT Ty = MRI.getType(Res);
6542   LLT BoolTy = Ty.changeElementSize(1);
6543   bool IsSigned;
6544   bool IsAdd;
6545   unsigned OverflowOp;
6546   switch (MI.getOpcode()) {
6547   default:
6548     llvm_unreachable("unexpected addsat/subsat opcode");
6549   case TargetOpcode::G_UADDSAT:
6550     IsSigned = false;
6551     IsAdd = true;
6552     OverflowOp = TargetOpcode::G_UADDO;
6553     break;
6554   case TargetOpcode::G_SADDSAT:
6555     IsSigned = true;
6556     IsAdd = true;
6557     OverflowOp = TargetOpcode::G_SADDO;
6558     break;
6559   case TargetOpcode::G_USUBSAT:
6560     IsSigned = false;
6561     IsAdd = false;
6562     OverflowOp = TargetOpcode::G_USUBO;
6563     break;
6564   case TargetOpcode::G_SSUBSAT:
6565     IsSigned = true;
6566     IsAdd = false;
6567     OverflowOp = TargetOpcode::G_SSUBO;
6568     break;
6569   }
6570 
6571   auto OverflowRes =
6572       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
6573   Register Tmp = OverflowRes.getReg(0);
6574   Register Ov = OverflowRes.getReg(1);
6575   MachineInstrBuilder Clamp;
6576   if (IsSigned) {
6577     // sadd.sat(a, b) ->
6578     //   {tmp, ov} = saddo(a, b)
6579     //   ov ? (tmp >>s 31) + 0x80000000 : r
6580     // ssub.sat(a, b) ->
6581     //   {tmp, ov} = ssubo(a, b)
6582     //   ov ? (tmp >>s 31) + 0x80000000 : r
6583     uint64_t NumBits = Ty.getScalarSizeInBits();
6584     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
6585     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
6586     auto MinVal =
6587         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6588     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
6589   } else {
6590     // uadd.sat(a, b) ->
6591     //   {tmp, ov} = uaddo(a, b)
6592     //   ov ? 0xffffffff : tmp
6593     // usub.sat(a, b) ->
6594     //   {tmp, ov} = usubo(a, b)
6595     //   ov ? 0 : tmp
6596     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
6597   }
6598   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
6599 
6600   MI.eraseFromParent();
6601   return Legalized;
6602 }
6603 
6604 LegalizerHelper::LegalizeResult
6605 LegalizerHelper::lowerShlSat(MachineInstr &MI) {
6606   assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
6607           MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
6608          "Expected shlsat opcode!");
6609   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
6610   Register Res = MI.getOperand(0).getReg();
6611   Register LHS = MI.getOperand(1).getReg();
6612   Register RHS = MI.getOperand(2).getReg();
6613   LLT Ty = MRI.getType(Res);
6614   LLT BoolTy = Ty.changeElementSize(1);
6615 
6616   unsigned BW = Ty.getScalarSizeInBits();
6617   auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
6618   auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
6619                        : MIRBuilder.buildLShr(Ty, Result, RHS);
6620 
6621   MachineInstrBuilder SatVal;
6622   if (IsSigned) {
6623     auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
6624     auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
6625     auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
6626                                     MIRBuilder.buildConstant(Ty, 0));
6627     SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
6628   } else {
6629     SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
6630   }
6631   auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
6632   MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
6633 
6634   MI.eraseFromParent();
6635   return Legalized;
6636 }
6637 
6638 LegalizerHelper::LegalizeResult
6639 LegalizerHelper::lowerBswap(MachineInstr &MI) {
6640   Register Dst = MI.getOperand(0).getReg();
6641   Register Src = MI.getOperand(1).getReg();
6642   const LLT Ty = MRI.getType(Src);
6643   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
6644   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
6645 
6646   // Swap most and least significant byte, set remaining bytes in Res to zero.
6647   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
6648   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
6649   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6650   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
6651 
6652   // Set i-th high/low byte in Res to i-th low/high byte from Src.
6653   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
6654     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
6655     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
6656     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
6657     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
6658     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
6659     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
6660     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
6661     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
6662     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
6663     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6664     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
6665     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
6666   }
6667   Res.getInstr()->getOperand(0).setReg(Dst);
6668 
6669   MI.eraseFromParent();
6670   return Legalized;
6671 }
6672 
6673 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
6674 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
6675                                  MachineInstrBuilder Src, APInt Mask) {
6676   const LLT Ty = Dst.getLLTTy(*B.getMRI());
6677   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
6678   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
6679   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
6680   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
6681   return B.buildOr(Dst, LHS, RHS);
6682 }
6683 
6684 LegalizerHelper::LegalizeResult
6685 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
6686   Register Dst = MI.getOperand(0).getReg();
6687   Register Src = MI.getOperand(1).getReg();
6688   const LLT Ty = MRI.getType(Src);
6689   unsigned Size = Ty.getSizeInBits();
6690 
6691   MachineInstrBuilder BSWAP =
6692       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
6693 
6694   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
6695   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
6696   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
6697   MachineInstrBuilder Swap4 =
6698       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
6699 
6700   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
6701   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
6702   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
6703   MachineInstrBuilder Swap2 =
6704       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
6705 
6706   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
6707   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
6708   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
6709   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
6710 
6711   MI.eraseFromParent();
6712   return Legalized;
6713 }
6714 
6715 LegalizerHelper::LegalizeResult
6716 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
6717   MachineFunction &MF = MIRBuilder.getMF();
6718 
6719   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
6720   int NameOpIdx = IsRead ? 1 : 0;
6721   int ValRegIndex = IsRead ? 0 : 1;
6722 
6723   Register ValReg = MI.getOperand(ValRegIndex).getReg();
6724   const LLT Ty = MRI.getType(ValReg);
6725   const MDString *RegStr = cast<MDString>(
6726     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
6727 
6728   Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
6729   if (!PhysReg.isValid())
6730     return UnableToLegalize;
6731 
6732   if (IsRead)
6733     MIRBuilder.buildCopy(ValReg, PhysReg);
6734   else
6735     MIRBuilder.buildCopy(PhysReg, ValReg);
6736 
6737   MI.eraseFromParent();
6738   return Legalized;
6739 }
6740 
6741 LegalizerHelper::LegalizeResult
6742 LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
6743   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
6744   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
6745   Register Result = MI.getOperand(0).getReg();
6746   LLT OrigTy = MRI.getType(Result);
6747   auto SizeInBits = OrigTy.getScalarSizeInBits();
6748   LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
6749 
6750   auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
6751   auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
6752   auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
6753   unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
6754 
6755   auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
6756   auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
6757   MIRBuilder.buildTrunc(Result, Shifted);
6758 
6759   MI.eraseFromParent();
6760   return Legalized;
6761 }
6762 
6763 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
6764   // Implement vector G_SELECT in terms of XOR, AND, OR.
6765   Register DstReg = MI.getOperand(0).getReg();
6766   Register MaskReg = MI.getOperand(1).getReg();
6767   Register Op1Reg = MI.getOperand(2).getReg();
6768   Register Op2Reg = MI.getOperand(3).getReg();
6769   LLT DstTy = MRI.getType(DstReg);
6770   LLT MaskTy = MRI.getType(MaskReg);
6771   LLT Op1Ty = MRI.getType(Op1Reg);
6772   if (!DstTy.isVector())
6773     return UnableToLegalize;
6774 
6775   // Vector selects can have a scalar predicate. If so, splat into a vector and
6776   // finish for later legalization attempts to try again.
6777   if (MaskTy.isScalar()) {
6778     Register MaskElt = MaskReg;
6779     if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
6780       MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
6781     // Generate a vector splat idiom to be pattern matched later.
6782     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
6783     Observer.changingInstr(MI);
6784     MI.getOperand(1).setReg(ShufSplat.getReg(0));
6785     Observer.changedInstr(MI);
6786     return Legalized;
6787   }
6788 
6789   if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
6790     return UnableToLegalize;
6791   }
6792 
6793   auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
6794   auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
6795   auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
6796   MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
6797   MI.eraseFromParent();
6798   return Legalized;
6799 }
6800 
6801 LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
6802   // Split DIVREM into individual instructions.
6803   unsigned Opcode = MI.getOpcode();
6804 
6805   MIRBuilder.buildInstr(
6806       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
6807                                         : TargetOpcode::G_UDIV,
6808       {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
6809   MIRBuilder.buildInstr(
6810       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
6811                                         : TargetOpcode::G_UREM,
6812       {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
6813   MI.eraseFromParent();
6814   return Legalized;
6815 }
6816