1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #define DEBUG_TYPE "legalizer"
29 
30 using namespace llvm;
31 using namespace LegalizeActions;
32 
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 ///
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
37 ///
38 /// Returns -1 in the first element of the pair if the breakdown is not
39 /// satisfiable.
40 static std::pair<int, int>
41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42   assert(!LeftoverTy.isValid() && "this is an out argument");
43 
44   unsigned Size = OrigTy.getSizeInBits();
45   unsigned NarrowSize = NarrowTy.getSizeInBits();
46   unsigned NumParts = Size / NarrowSize;
47   unsigned LeftoverSize = Size - NumParts * NarrowSize;
48   assert(Size > NarrowSize);
49 
50   if (LeftoverSize == 0)
51     return {NumParts, 0};
52 
53   if (NarrowTy.isVector()) {
54     unsigned EltSize = OrigTy.getScalarSizeInBits();
55     if (LeftoverSize % EltSize != 0)
56       return {-1, -1};
57     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58   } else {
59     LeftoverTy = LLT::scalar(LeftoverSize);
60   }
61 
62   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63   return std::make_pair(NumParts, NumLeftover);
64 }
65 
66 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
67 
68   if (!Ty.isScalar())
69     return nullptr;
70 
71   switch (Ty.getSizeInBits()) {
72   case 16:
73     return Type::getHalfTy(Ctx);
74   case 32:
75     return Type::getFloatTy(Ctx);
76   case 64:
77     return Type::getDoubleTy(Ctx);
78   case 128:
79     return Type::getFP128Ty(Ctx);
80   default:
81     return nullptr;
82   }
83 }
84 
85 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
86                                  GISelChangeObserver &Observer,
87                                  MachineIRBuilder &Builder)
88     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
89       LI(*MF.getSubtarget().getLegalizerInfo()) {
90   MIRBuilder.setChangeObserver(Observer);
91 }
92 
93 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
94                                  GISelChangeObserver &Observer,
95                                  MachineIRBuilder &B)
96     : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI) {
97   MIRBuilder.setChangeObserver(Observer);
98 }
99 LegalizerHelper::LegalizeResult
100 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
101   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
102 
103   MIRBuilder.setInstrAndDebugLoc(MI);
104 
105   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
106       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
107     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
108   auto Step = LI.getAction(MI, MRI);
109   switch (Step.Action) {
110   case Legal:
111     LLVM_DEBUG(dbgs() << ".. Already legal\n");
112     return AlreadyLegal;
113   case Libcall:
114     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
115     return libcall(MI);
116   case NarrowScalar:
117     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
118     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
119   case WidenScalar:
120     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
121     return widenScalar(MI, Step.TypeIdx, Step.NewType);
122   case Bitcast:
123     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
124     return bitcast(MI, Step.TypeIdx, Step.NewType);
125   case Lower:
126     LLVM_DEBUG(dbgs() << ".. Lower\n");
127     return lower(MI, Step.TypeIdx, Step.NewType);
128   case FewerElements:
129     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
130     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
131   case MoreElements:
132     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
133     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
134   case Custom:
135     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
136     return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
137   default:
138     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
139     return UnableToLegalize;
140   }
141 }
142 
143 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
144                                    SmallVectorImpl<Register> &VRegs) {
145   for (int i = 0; i < NumParts; ++i)
146     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
147   MIRBuilder.buildUnmerge(VRegs, Reg);
148 }
149 
150 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
151                                    LLT MainTy, LLT &LeftoverTy,
152                                    SmallVectorImpl<Register> &VRegs,
153                                    SmallVectorImpl<Register> &LeftoverRegs) {
154   assert(!LeftoverTy.isValid() && "this is an out argument");
155 
156   unsigned RegSize = RegTy.getSizeInBits();
157   unsigned MainSize = MainTy.getSizeInBits();
158   unsigned NumParts = RegSize / MainSize;
159   unsigned LeftoverSize = RegSize - NumParts * MainSize;
160 
161   // Use an unmerge when possible.
162   if (LeftoverSize == 0) {
163     for (unsigned I = 0; I < NumParts; ++I)
164       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
165     MIRBuilder.buildUnmerge(VRegs, Reg);
166     return true;
167   }
168 
169   if (MainTy.isVector()) {
170     unsigned EltSize = MainTy.getScalarSizeInBits();
171     if (LeftoverSize % EltSize != 0)
172       return false;
173     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
174   } else {
175     LeftoverTy = LLT::scalar(LeftoverSize);
176   }
177 
178   // For irregular sizes, extract the individual parts.
179   for (unsigned I = 0; I != NumParts; ++I) {
180     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
181     VRegs.push_back(NewReg);
182     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
183   }
184 
185   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
186        Offset += LeftoverSize) {
187     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
188     LeftoverRegs.push_back(NewReg);
189     MIRBuilder.buildExtract(NewReg, Reg, Offset);
190   }
191 
192   return true;
193 }
194 
195 void LegalizerHelper::insertParts(Register DstReg,
196                                   LLT ResultTy, LLT PartTy,
197                                   ArrayRef<Register> PartRegs,
198                                   LLT LeftoverTy,
199                                   ArrayRef<Register> LeftoverRegs) {
200   if (!LeftoverTy.isValid()) {
201     assert(LeftoverRegs.empty());
202 
203     if (!ResultTy.isVector()) {
204       MIRBuilder.buildMerge(DstReg, PartRegs);
205       return;
206     }
207 
208     if (PartTy.isVector())
209       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
210     else
211       MIRBuilder.buildBuildVector(DstReg, PartRegs);
212     return;
213   }
214 
215   unsigned PartSize = PartTy.getSizeInBits();
216   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
217 
218   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
219   MIRBuilder.buildUndef(CurResultReg);
220 
221   unsigned Offset = 0;
222   for (Register PartReg : PartRegs) {
223     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
224     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
225     CurResultReg = NewResultReg;
226     Offset += PartSize;
227   }
228 
229   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
230     // Use the original output register for the final insert to avoid a copy.
231     Register NewResultReg = (I + 1 == E) ?
232       DstReg : MRI.createGenericVirtualRegister(ResultTy);
233 
234     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
235     CurResultReg = NewResultReg;
236     Offset += LeftoverPartSize;
237   }
238 }
239 
240 /// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
241 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
242                               const MachineInstr &MI) {
243   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
244 
245   const int NumResults = MI.getNumOperands() - 1;
246   Regs.resize(NumResults);
247   for (int I = 0; I != NumResults; ++I)
248     Regs[I] = MI.getOperand(I).getReg();
249 }
250 
251 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
252                                     LLT NarrowTy, Register SrcReg) {
253   LLT SrcTy = MRI.getType(SrcReg);
254 
255   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
256   if (SrcTy == GCDTy) {
257     // If the source already evenly divides the result type, we don't need to do
258     // anything.
259     Parts.push_back(SrcReg);
260   } else {
261     // Need to split into common type sized pieces.
262     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
263     getUnmergeResults(Parts, *Unmerge);
264   }
265 
266   return GCDTy;
267 }
268 
269 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
270                                          SmallVectorImpl<Register> &VRegs,
271                                          unsigned PadStrategy) {
272   LLT LCMTy = getLCMType(DstTy, NarrowTy);
273 
274   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
275   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
276   int NumOrigSrc = VRegs.size();
277 
278   Register PadReg;
279 
280   // Get a value we can use to pad the source value if the sources won't evenly
281   // cover the result type.
282   if (NumOrigSrc < NumParts * NumSubParts) {
283     if (PadStrategy == TargetOpcode::G_ZEXT)
284       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
285     else if (PadStrategy == TargetOpcode::G_ANYEXT)
286       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
287     else {
288       assert(PadStrategy == TargetOpcode::G_SEXT);
289 
290       // Shift the sign bit of the low register through the high register.
291       auto ShiftAmt =
292         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
293       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
294     }
295   }
296 
297   // Registers for the final merge to be produced.
298   SmallVector<Register, 4> Remerge(NumParts);
299 
300   // Registers needed for intermediate merges, which will be merged into a
301   // source for Remerge.
302   SmallVector<Register, 4> SubMerge(NumSubParts);
303 
304   // Once we've fully read off the end of the original source bits, we can reuse
305   // the same high bits for remaining padding elements.
306   Register AllPadReg;
307 
308   // Build merges to the LCM type to cover the original result type.
309   for (int I = 0; I != NumParts; ++I) {
310     bool AllMergePartsArePadding = true;
311 
312     // Build the requested merges to the requested type.
313     for (int J = 0; J != NumSubParts; ++J) {
314       int Idx = I * NumSubParts + J;
315       if (Idx >= NumOrigSrc) {
316         SubMerge[J] = PadReg;
317         continue;
318       }
319 
320       SubMerge[J] = VRegs[Idx];
321 
322       // There are meaningful bits here we can't reuse later.
323       AllMergePartsArePadding = false;
324     }
325 
326     // If we've filled up a complete piece with padding bits, we can directly
327     // emit the natural sized constant if applicable, rather than a merge of
328     // smaller constants.
329     if (AllMergePartsArePadding && !AllPadReg) {
330       if (PadStrategy == TargetOpcode::G_ANYEXT)
331         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
332       else if (PadStrategy == TargetOpcode::G_ZEXT)
333         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
334 
335       // If this is a sign extension, we can't materialize a trivial constant
336       // with the right type and have to produce a merge.
337     }
338 
339     if (AllPadReg) {
340       // Avoid creating additional instructions if we're just adding additional
341       // copies of padding bits.
342       Remerge[I] = AllPadReg;
343       continue;
344     }
345 
346     if (NumSubParts == 1)
347       Remerge[I] = SubMerge[0];
348     else
349       Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
350 
351     // In the sign extend padding case, re-use the first all-signbit merge.
352     if (AllMergePartsArePadding && !AllPadReg)
353       AllPadReg = Remerge[I];
354   }
355 
356   VRegs = std::move(Remerge);
357   return LCMTy;
358 }
359 
360 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
361                                                ArrayRef<Register> RemergeRegs) {
362   LLT DstTy = MRI.getType(DstReg);
363 
364   // Create the merge to the widened source, and extract the relevant bits into
365   // the result.
366 
367   if (DstTy == LCMTy) {
368     MIRBuilder.buildMerge(DstReg, RemergeRegs);
369     return;
370   }
371 
372   auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
373   if (DstTy.isScalar() && LCMTy.isScalar()) {
374     MIRBuilder.buildTrunc(DstReg, Remerge);
375     return;
376   }
377 
378   if (LCMTy.isVector()) {
379     MIRBuilder.buildExtract(DstReg, Remerge, 0);
380     return;
381   }
382 
383   llvm_unreachable("unhandled case");
384 }
385 
386 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
387 #define RTLIBCASE(LibcallPrefix)                                               \
388   do {                                                                         \
389     switch (Size) {                                                            \
390     case 32:                                                                   \
391       return RTLIB::LibcallPrefix##32;                                         \
392     case 64:                                                                   \
393       return RTLIB::LibcallPrefix##64;                                         \
394     case 128:                                                                  \
395       return RTLIB::LibcallPrefix##128;                                        \
396     default:                                                                   \
397       llvm_unreachable("unexpected size");                                     \
398     }                                                                          \
399   } while (0)
400 
401   assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
402 
403   switch (Opcode) {
404   case TargetOpcode::G_SDIV:
405     RTLIBCASE(SDIV_I);
406   case TargetOpcode::G_UDIV:
407     RTLIBCASE(UDIV_I);
408   case TargetOpcode::G_SREM:
409     RTLIBCASE(SREM_I);
410   case TargetOpcode::G_UREM:
411     RTLIBCASE(UREM_I);
412   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
413     RTLIBCASE(CTLZ_I);
414   case TargetOpcode::G_FADD:
415     RTLIBCASE(ADD_F);
416   case TargetOpcode::G_FSUB:
417     RTLIBCASE(SUB_F);
418   case TargetOpcode::G_FMUL:
419     RTLIBCASE(MUL_F);
420   case TargetOpcode::G_FDIV:
421     RTLIBCASE(DIV_F);
422   case TargetOpcode::G_FEXP:
423     RTLIBCASE(EXP_F);
424   case TargetOpcode::G_FEXP2:
425     RTLIBCASE(EXP2_F);
426   case TargetOpcode::G_FREM:
427     RTLIBCASE(REM_F);
428   case TargetOpcode::G_FPOW:
429     RTLIBCASE(POW_F);
430   case TargetOpcode::G_FMA:
431     RTLIBCASE(FMA_F);
432   case TargetOpcode::G_FSIN:
433     RTLIBCASE(SIN_F);
434   case TargetOpcode::G_FCOS:
435     RTLIBCASE(COS_F);
436   case TargetOpcode::G_FLOG10:
437     RTLIBCASE(LOG10_F);
438   case TargetOpcode::G_FLOG:
439     RTLIBCASE(LOG_F);
440   case TargetOpcode::G_FLOG2:
441     RTLIBCASE(LOG2_F);
442   case TargetOpcode::G_FCEIL:
443     RTLIBCASE(CEIL_F);
444   case TargetOpcode::G_FFLOOR:
445     RTLIBCASE(FLOOR_F);
446   case TargetOpcode::G_FMINNUM:
447     RTLIBCASE(FMIN_F);
448   case TargetOpcode::G_FMAXNUM:
449     RTLIBCASE(FMAX_F);
450   case TargetOpcode::G_FSQRT:
451     RTLIBCASE(SQRT_F);
452   case TargetOpcode::G_FRINT:
453     RTLIBCASE(RINT_F);
454   case TargetOpcode::G_FNEARBYINT:
455     RTLIBCASE(NEARBYINT_F);
456   }
457   llvm_unreachable("Unknown libcall function");
458 }
459 
460 /// True if an instruction is in tail position in its caller. Intended for
461 /// legalizing libcalls as tail calls when possible.
462 static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
463                                     MachineInstr &MI) {
464   MachineBasicBlock &MBB = *MI.getParent();
465   const Function &F = MBB.getParent()->getFunction();
466 
467   // Conservatively require the attributes of the call to match those of
468   // the return. Ignore NoAlias and NonNull because they don't affect the
469   // call sequence.
470   AttributeList CallerAttrs = F.getAttributes();
471   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
472           .removeAttribute(Attribute::NoAlias)
473           .removeAttribute(Attribute::NonNull)
474           .hasAttributes())
475     return false;
476 
477   // It's not safe to eliminate the sign / zero extension of the return value.
478   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
479       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
480     return false;
481 
482   // Only tail call if the following instruction is a standard return.
483   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
484   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
485     return false;
486 
487   return true;
488 }
489 
490 LegalizerHelper::LegalizeResult
491 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
492                     const CallLowering::ArgInfo &Result,
493                     ArrayRef<CallLowering::ArgInfo> Args,
494                     const CallingConv::ID CC) {
495   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
496 
497   CallLowering::CallLoweringInfo Info;
498   Info.CallConv = CC;
499   Info.Callee = MachineOperand::CreateES(Name);
500   Info.OrigRet = Result;
501   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
502   if (!CLI.lowerCall(MIRBuilder, Info))
503     return LegalizerHelper::UnableToLegalize;
504 
505   return LegalizerHelper::Legalized;
506 }
507 
508 LegalizerHelper::LegalizeResult
509 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
510                     const CallLowering::ArgInfo &Result,
511                     ArrayRef<CallLowering::ArgInfo> Args) {
512   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
513   const char *Name = TLI.getLibcallName(Libcall);
514   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
515   return createLibcall(MIRBuilder, Name, Result, Args, CC);
516 }
517 
518 // Useful for libcalls where all operands have the same type.
519 static LegalizerHelper::LegalizeResult
520 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
521               Type *OpType) {
522   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
523 
524   SmallVector<CallLowering::ArgInfo, 3> Args;
525   for (unsigned i = 1; i < MI.getNumOperands(); i++)
526     Args.push_back({MI.getOperand(i).getReg(), OpType});
527   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
528                        Args);
529 }
530 
531 LegalizerHelper::LegalizeResult
532 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
533                        MachineInstr &MI) {
534   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
535   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
536 
537   SmallVector<CallLowering::ArgInfo, 3> Args;
538   // Add all the args, except for the last which is an imm denoting 'tail'.
539   for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
540     Register Reg = MI.getOperand(i).getReg();
541 
542     // Need derive an IR type for call lowering.
543     LLT OpLLT = MRI.getType(Reg);
544     Type *OpTy = nullptr;
545     if (OpLLT.isPointer())
546       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
547     else
548       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
549     Args.push_back({Reg, OpTy});
550   }
551 
552   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
553   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
554   Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
555   RTLIB::Libcall RTLibcall;
556   switch (ID) {
557   case Intrinsic::memcpy:
558     RTLibcall = RTLIB::MEMCPY;
559     break;
560   case Intrinsic::memset:
561     RTLibcall = RTLIB::MEMSET;
562     break;
563   case Intrinsic::memmove:
564     RTLibcall = RTLIB::MEMMOVE;
565     break;
566   default:
567     return LegalizerHelper::UnableToLegalize;
568   }
569   const char *Name = TLI.getLibcallName(RTLibcall);
570 
571   MIRBuilder.setInstrAndDebugLoc(MI);
572 
573   CallLowering::CallLoweringInfo Info;
574   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
575   Info.Callee = MachineOperand::CreateES(Name);
576   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
577   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
578                     isLibCallInTailPosition(MIRBuilder.getTII(), MI);
579 
580   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
581   if (!CLI.lowerCall(MIRBuilder, Info))
582     return LegalizerHelper::UnableToLegalize;
583 
584   if (Info.LoweredTailCall) {
585     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
586     // We must have a return following the call (or debug insts) to get past
587     // isLibCallInTailPosition.
588     do {
589       MachineInstr *Next = MI.getNextNode();
590       assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
591              "Expected instr following MI to be return or debug inst?");
592       // We lowered a tail call, so the call is now the return from the block.
593       // Delete the old return.
594       Next->eraseFromParent();
595     } while (MI.getNextNode());
596   }
597 
598   return LegalizerHelper::Legalized;
599 }
600 
601 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
602                                        Type *FromType) {
603   auto ToMVT = MVT::getVT(ToType);
604   auto FromMVT = MVT::getVT(FromType);
605 
606   switch (Opcode) {
607   case TargetOpcode::G_FPEXT:
608     return RTLIB::getFPEXT(FromMVT, ToMVT);
609   case TargetOpcode::G_FPTRUNC:
610     return RTLIB::getFPROUND(FromMVT, ToMVT);
611   case TargetOpcode::G_FPTOSI:
612     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
613   case TargetOpcode::G_FPTOUI:
614     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
615   case TargetOpcode::G_SITOFP:
616     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
617   case TargetOpcode::G_UITOFP:
618     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
619   }
620   llvm_unreachable("Unsupported libcall function");
621 }
622 
623 static LegalizerHelper::LegalizeResult
624 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
625                   Type *FromType) {
626   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
627   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
628                        {{MI.getOperand(1).getReg(), FromType}});
629 }
630 
631 LegalizerHelper::LegalizeResult
632 LegalizerHelper::libcall(MachineInstr &MI) {
633   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
634   unsigned Size = LLTy.getSizeInBits();
635   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
636 
637   switch (MI.getOpcode()) {
638   default:
639     return UnableToLegalize;
640   case TargetOpcode::G_SDIV:
641   case TargetOpcode::G_UDIV:
642   case TargetOpcode::G_SREM:
643   case TargetOpcode::G_UREM:
644   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
645     Type *HLTy = IntegerType::get(Ctx, Size);
646     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
647     if (Status != Legalized)
648       return Status;
649     break;
650   }
651   case TargetOpcode::G_FADD:
652   case TargetOpcode::G_FSUB:
653   case TargetOpcode::G_FMUL:
654   case TargetOpcode::G_FDIV:
655   case TargetOpcode::G_FMA:
656   case TargetOpcode::G_FPOW:
657   case TargetOpcode::G_FREM:
658   case TargetOpcode::G_FCOS:
659   case TargetOpcode::G_FSIN:
660   case TargetOpcode::G_FLOG10:
661   case TargetOpcode::G_FLOG:
662   case TargetOpcode::G_FLOG2:
663   case TargetOpcode::G_FEXP:
664   case TargetOpcode::G_FEXP2:
665   case TargetOpcode::G_FCEIL:
666   case TargetOpcode::G_FFLOOR:
667   case TargetOpcode::G_FMINNUM:
668   case TargetOpcode::G_FMAXNUM:
669   case TargetOpcode::G_FSQRT:
670   case TargetOpcode::G_FRINT:
671   case TargetOpcode::G_FNEARBYINT: {
672     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
673     if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) {
674       LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n");
675       return UnableToLegalize;
676     }
677     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
678     if (Status != Legalized)
679       return Status;
680     break;
681   }
682   case TargetOpcode::G_FPEXT:
683   case TargetOpcode::G_FPTRUNC: {
684     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
685     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
686     if (!FromTy || !ToTy)
687       return UnableToLegalize;
688     LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
689     if (Status != Legalized)
690       return Status;
691     break;
692   }
693   case TargetOpcode::G_FPTOSI:
694   case TargetOpcode::G_FPTOUI: {
695     // FIXME: Support other types
696     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
697     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
698     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
699       return UnableToLegalize;
700     LegalizeResult Status = conversionLibcall(
701         MI, MIRBuilder,
702         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
703         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
704     if (Status != Legalized)
705       return Status;
706     break;
707   }
708   case TargetOpcode::G_SITOFP:
709   case TargetOpcode::G_UITOFP: {
710     // FIXME: Support other types
711     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
712     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
713     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
714       return UnableToLegalize;
715     LegalizeResult Status = conversionLibcall(
716         MI, MIRBuilder,
717         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
718         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
719     if (Status != Legalized)
720       return Status;
721     break;
722   }
723   }
724 
725   MI.eraseFromParent();
726   return Legalized;
727 }
728 
729 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
730                                                               unsigned TypeIdx,
731                                                               LLT NarrowTy) {
732   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
733   uint64_t NarrowSize = NarrowTy.getSizeInBits();
734 
735   switch (MI.getOpcode()) {
736   default:
737     return UnableToLegalize;
738   case TargetOpcode::G_IMPLICIT_DEF: {
739     Register DstReg = MI.getOperand(0).getReg();
740     LLT DstTy = MRI.getType(DstReg);
741 
742     // If SizeOp0 is not an exact multiple of NarrowSize, emit
743     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
744     // FIXME: Although this would also be legal for the general case, it causes
745     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
746     //  combines not being hit). This seems to be a problem related to the
747     //  artifact combiner.
748     if (SizeOp0 % NarrowSize != 0) {
749       LLT ImplicitTy = NarrowTy;
750       if (DstTy.isVector())
751         ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
752 
753       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
754       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
755 
756       MI.eraseFromParent();
757       return Legalized;
758     }
759 
760     int NumParts = SizeOp0 / NarrowSize;
761 
762     SmallVector<Register, 2> DstRegs;
763     for (int i = 0; i < NumParts; ++i)
764       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
765 
766     if (DstTy.isVector())
767       MIRBuilder.buildBuildVector(DstReg, DstRegs);
768     else
769       MIRBuilder.buildMerge(DstReg, DstRegs);
770     MI.eraseFromParent();
771     return Legalized;
772   }
773   case TargetOpcode::G_CONSTANT: {
774     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
775     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
776     unsigned TotalSize = Ty.getSizeInBits();
777     unsigned NarrowSize = NarrowTy.getSizeInBits();
778     int NumParts = TotalSize / NarrowSize;
779 
780     SmallVector<Register, 4> PartRegs;
781     for (int I = 0; I != NumParts; ++I) {
782       unsigned Offset = I * NarrowSize;
783       auto K = MIRBuilder.buildConstant(NarrowTy,
784                                         Val.lshr(Offset).trunc(NarrowSize));
785       PartRegs.push_back(K.getReg(0));
786     }
787 
788     LLT LeftoverTy;
789     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
790     SmallVector<Register, 1> LeftoverRegs;
791     if (LeftoverBits != 0) {
792       LeftoverTy = LLT::scalar(LeftoverBits);
793       auto K = MIRBuilder.buildConstant(
794         LeftoverTy,
795         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
796       LeftoverRegs.push_back(K.getReg(0));
797     }
798 
799     insertParts(MI.getOperand(0).getReg(),
800                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
801 
802     MI.eraseFromParent();
803     return Legalized;
804   }
805   case TargetOpcode::G_SEXT:
806   case TargetOpcode::G_ZEXT:
807   case TargetOpcode::G_ANYEXT:
808     return narrowScalarExt(MI, TypeIdx, NarrowTy);
809   case TargetOpcode::G_TRUNC: {
810     if (TypeIdx != 1)
811       return UnableToLegalize;
812 
813     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
814     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
815       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
816       return UnableToLegalize;
817     }
818 
819     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
820     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
821     MI.eraseFromParent();
822     return Legalized;
823   }
824 
825   case TargetOpcode::G_FREEZE:
826     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
827 
828   case TargetOpcode::G_ADD: {
829     // FIXME: add support for when SizeOp0 isn't an exact multiple of
830     // NarrowSize.
831     if (SizeOp0 % NarrowSize != 0)
832       return UnableToLegalize;
833     // Expand in terms of carry-setting/consuming G_ADDE instructions.
834     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
835 
836     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
837     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
838     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
839 
840     Register CarryIn;
841     for (int i = 0; i < NumParts; ++i) {
842       Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
843       Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
844 
845       if (i == 0)
846         MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
847       else {
848         MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
849                               Src2Regs[i], CarryIn);
850       }
851 
852       DstRegs.push_back(DstReg);
853       CarryIn = CarryOut;
854     }
855     Register DstReg = MI.getOperand(0).getReg();
856     if(MRI.getType(DstReg).isVector())
857       MIRBuilder.buildBuildVector(DstReg, DstRegs);
858     else
859       MIRBuilder.buildMerge(DstReg, DstRegs);
860     MI.eraseFromParent();
861     return Legalized;
862   }
863   case TargetOpcode::G_SUB: {
864     // FIXME: add support for when SizeOp0 isn't an exact multiple of
865     // NarrowSize.
866     if (SizeOp0 % NarrowSize != 0)
867       return UnableToLegalize;
868 
869     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
870 
871     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
872     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
873     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
874 
875     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
876     Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
877     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
878                           {Src1Regs[0], Src2Regs[0]});
879     DstRegs.push_back(DstReg);
880     Register BorrowIn = BorrowOut;
881     for (int i = 1; i < NumParts; ++i) {
882       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
883       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
884 
885       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
886                             {Src1Regs[i], Src2Regs[i], BorrowIn});
887 
888       DstRegs.push_back(DstReg);
889       BorrowIn = BorrowOut;
890     }
891     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
892     MI.eraseFromParent();
893     return Legalized;
894   }
895   case TargetOpcode::G_MUL:
896   case TargetOpcode::G_UMULH:
897     return narrowScalarMul(MI, NarrowTy);
898   case TargetOpcode::G_EXTRACT:
899     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
900   case TargetOpcode::G_INSERT:
901     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
902   case TargetOpcode::G_LOAD: {
903     const auto &MMO = **MI.memoperands_begin();
904     Register DstReg = MI.getOperand(0).getReg();
905     LLT DstTy = MRI.getType(DstReg);
906     if (DstTy.isVector())
907       return UnableToLegalize;
908 
909     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
910       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
911       auto &MMO = **MI.memoperands_begin();
912       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
913       MIRBuilder.buildAnyExt(DstReg, TmpReg);
914       MI.eraseFromParent();
915       return Legalized;
916     }
917 
918     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
919   }
920   case TargetOpcode::G_ZEXTLOAD:
921   case TargetOpcode::G_SEXTLOAD: {
922     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
923     Register DstReg = MI.getOperand(0).getReg();
924     Register PtrReg = MI.getOperand(1).getReg();
925 
926     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
927     auto &MMO = **MI.memoperands_begin();
928     if (MMO.getSizeInBits() == NarrowSize) {
929       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
930     } else {
931       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
932     }
933 
934     if (ZExt)
935       MIRBuilder.buildZExt(DstReg, TmpReg);
936     else
937       MIRBuilder.buildSExt(DstReg, TmpReg);
938 
939     MI.eraseFromParent();
940     return Legalized;
941   }
942   case TargetOpcode::G_STORE: {
943     const auto &MMO = **MI.memoperands_begin();
944 
945     Register SrcReg = MI.getOperand(0).getReg();
946     LLT SrcTy = MRI.getType(SrcReg);
947     if (SrcTy.isVector())
948       return UnableToLegalize;
949 
950     int NumParts = SizeOp0 / NarrowSize;
951     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
952     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
953     if (SrcTy.isVector() && LeftoverBits != 0)
954       return UnableToLegalize;
955 
956     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
957       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
958       auto &MMO = **MI.memoperands_begin();
959       MIRBuilder.buildTrunc(TmpReg, SrcReg);
960       MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
961       MI.eraseFromParent();
962       return Legalized;
963     }
964 
965     return reduceLoadStoreWidth(MI, 0, NarrowTy);
966   }
967   case TargetOpcode::G_SELECT:
968     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
969   case TargetOpcode::G_AND:
970   case TargetOpcode::G_OR:
971   case TargetOpcode::G_XOR: {
972     // Legalize bitwise operation:
973     // A = BinOp<Ty> B, C
974     // into:
975     // B1, ..., BN = G_UNMERGE_VALUES B
976     // C1, ..., CN = G_UNMERGE_VALUES C
977     // A1 = BinOp<Ty/N> B1, C2
978     // ...
979     // AN = BinOp<Ty/N> BN, CN
980     // A = G_MERGE_VALUES A1, ..., AN
981     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
982   }
983   case TargetOpcode::G_SHL:
984   case TargetOpcode::G_LSHR:
985   case TargetOpcode::G_ASHR:
986     return narrowScalarShift(MI, TypeIdx, NarrowTy);
987   case TargetOpcode::G_CTLZ:
988   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
989   case TargetOpcode::G_CTTZ:
990   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
991   case TargetOpcode::G_CTPOP:
992     if (TypeIdx == 1)
993       switch (MI.getOpcode()) {
994       case TargetOpcode::G_CTLZ:
995       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
996         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
997       case TargetOpcode::G_CTTZ:
998       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
999         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1000       case TargetOpcode::G_CTPOP:
1001         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1002       default:
1003         return UnableToLegalize;
1004       }
1005 
1006     Observer.changingInstr(MI);
1007     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1008     Observer.changedInstr(MI);
1009     return Legalized;
1010   case TargetOpcode::G_INTTOPTR:
1011     if (TypeIdx != 1)
1012       return UnableToLegalize;
1013 
1014     Observer.changingInstr(MI);
1015     narrowScalarSrc(MI, NarrowTy, 1);
1016     Observer.changedInstr(MI);
1017     return Legalized;
1018   case TargetOpcode::G_PTRTOINT:
1019     if (TypeIdx != 0)
1020       return UnableToLegalize;
1021 
1022     Observer.changingInstr(MI);
1023     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1024     Observer.changedInstr(MI);
1025     return Legalized;
1026   case TargetOpcode::G_PHI: {
1027     unsigned NumParts = SizeOp0 / NarrowSize;
1028     SmallVector<Register, 2> DstRegs(NumParts);
1029     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1030     Observer.changingInstr(MI);
1031     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1032       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1033       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1034       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1035                    SrcRegs[i / 2]);
1036     }
1037     MachineBasicBlock &MBB = *MI.getParent();
1038     MIRBuilder.setInsertPt(MBB, MI);
1039     for (unsigned i = 0; i < NumParts; ++i) {
1040       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1041       MachineInstrBuilder MIB =
1042           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1043       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1044         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1045     }
1046     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1047     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1048     Observer.changedInstr(MI);
1049     MI.eraseFromParent();
1050     return Legalized;
1051   }
1052   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1053   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1054     if (TypeIdx != 2)
1055       return UnableToLegalize;
1056 
1057     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1058     Observer.changingInstr(MI);
1059     narrowScalarSrc(MI, NarrowTy, OpIdx);
1060     Observer.changedInstr(MI);
1061     return Legalized;
1062   }
1063   case TargetOpcode::G_ICMP: {
1064     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1065     if (NarrowSize * 2 != SrcSize)
1066       return UnableToLegalize;
1067 
1068     Observer.changingInstr(MI);
1069     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1070     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1071     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1072 
1073     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1074     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1075     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1076 
1077     CmpInst::Predicate Pred =
1078         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1079     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1080 
1081     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1082       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1083       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1084       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1085       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1086       MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1087     } else {
1088       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1089       MachineInstrBuilder CmpHEQ =
1090           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1091       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1092           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1093       MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1094     }
1095     Observer.changedInstr(MI);
1096     MI.eraseFromParent();
1097     return Legalized;
1098   }
1099   case TargetOpcode::G_SEXT_INREG: {
1100     if (TypeIdx != 0)
1101       return UnableToLegalize;
1102 
1103     int64_t SizeInBits = MI.getOperand(2).getImm();
1104 
1105     // So long as the new type has more bits than the bits we're extending we
1106     // don't need to break it apart.
1107     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1108       Observer.changingInstr(MI);
1109       // We don't lose any non-extension bits by truncating the src and
1110       // sign-extending the dst.
1111       MachineOperand &MO1 = MI.getOperand(1);
1112       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1113       MO1.setReg(TruncMIB.getReg(0));
1114 
1115       MachineOperand &MO2 = MI.getOperand(0);
1116       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1117       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1118       MIRBuilder.buildSExt(MO2, DstExt);
1119       MO2.setReg(DstExt);
1120       Observer.changedInstr(MI);
1121       return Legalized;
1122     }
1123 
1124     // Break it apart. Components below the extension point are unmodified. The
1125     // component containing the extension point becomes a narrower SEXT_INREG.
1126     // Components above it are ashr'd from the component containing the
1127     // extension point.
1128     if (SizeOp0 % NarrowSize != 0)
1129       return UnableToLegalize;
1130     int NumParts = SizeOp0 / NarrowSize;
1131 
1132     // List the registers where the destination will be scattered.
1133     SmallVector<Register, 2> DstRegs;
1134     // List the registers where the source will be split.
1135     SmallVector<Register, 2> SrcRegs;
1136 
1137     // Create all the temporary registers.
1138     for (int i = 0; i < NumParts; ++i) {
1139       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1140 
1141       SrcRegs.push_back(SrcReg);
1142     }
1143 
1144     // Explode the big arguments into smaller chunks.
1145     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1146 
1147     Register AshrCstReg =
1148         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1149             .getReg(0);
1150     Register FullExtensionReg = 0;
1151     Register PartialExtensionReg = 0;
1152 
1153     // Do the operation on each small part.
1154     for (int i = 0; i < NumParts; ++i) {
1155       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1156         DstRegs.push_back(SrcRegs[i]);
1157       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1158         assert(PartialExtensionReg &&
1159                "Expected to visit partial extension before full");
1160         if (FullExtensionReg) {
1161           DstRegs.push_back(FullExtensionReg);
1162           continue;
1163         }
1164         DstRegs.push_back(
1165             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1166                 .getReg(0));
1167         FullExtensionReg = DstRegs.back();
1168       } else {
1169         DstRegs.push_back(
1170             MIRBuilder
1171                 .buildInstr(
1172                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
1173                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1174                 .getReg(0));
1175         PartialExtensionReg = DstRegs.back();
1176       }
1177     }
1178 
1179     // Gather the destination registers into the final destination.
1180     Register DstReg = MI.getOperand(0).getReg();
1181     MIRBuilder.buildMerge(DstReg, DstRegs);
1182     MI.eraseFromParent();
1183     return Legalized;
1184   }
1185   case TargetOpcode::G_BSWAP:
1186   case TargetOpcode::G_BITREVERSE: {
1187     if (SizeOp0 % NarrowSize != 0)
1188       return UnableToLegalize;
1189 
1190     Observer.changingInstr(MI);
1191     SmallVector<Register, 2> SrcRegs, DstRegs;
1192     unsigned NumParts = SizeOp0 / NarrowSize;
1193     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1194 
1195     for (unsigned i = 0; i < NumParts; ++i) {
1196       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1197                                            {SrcRegs[NumParts - 1 - i]});
1198       DstRegs.push_back(DstPart.getReg(0));
1199     }
1200 
1201     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1202 
1203     Observer.changedInstr(MI);
1204     MI.eraseFromParent();
1205     return Legalized;
1206   }
1207   case TargetOpcode::G_PTR_ADD:
1208   case TargetOpcode::G_PTRMASK: {
1209     if (TypeIdx != 1)
1210       return UnableToLegalize;
1211     Observer.changingInstr(MI);
1212     narrowScalarSrc(MI, NarrowTy, 2);
1213     Observer.changedInstr(MI);
1214     return Legalized;
1215   }
1216   case TargetOpcode::G_FPTOUI: {
1217     if (TypeIdx != 0)
1218       return UnableToLegalize;
1219     Observer.changingInstr(MI);
1220     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1221     Observer.changedInstr(MI);
1222     return Legalized;
1223   }
1224   case TargetOpcode::G_FPTOSI: {
1225     if (TypeIdx != 0)
1226       return UnableToLegalize;
1227     Observer.changingInstr(MI);
1228     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
1229     Observer.changedInstr(MI);
1230     return Legalized;
1231   }
1232   case TargetOpcode::G_FPEXT:
1233     if (TypeIdx != 0)
1234       return UnableToLegalize;
1235     Observer.changingInstr(MI);
1236     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1237     Observer.changedInstr(MI);
1238     return Legalized;
1239   }
1240 }
1241 
1242 Register LegalizerHelper::coerceToScalar(Register Val) {
1243   LLT Ty = MRI.getType(Val);
1244   if (Ty.isScalar())
1245     return Val;
1246 
1247   const DataLayout &DL = MIRBuilder.getDataLayout();
1248   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1249   if (Ty.isPointer()) {
1250     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1251       return Register();
1252     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1253   }
1254 
1255   Register NewVal = Val;
1256 
1257   assert(Ty.isVector());
1258   LLT EltTy = Ty.getElementType();
1259   if (EltTy.isPointer())
1260     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1261   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1262 }
1263 
1264 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1265                                      unsigned OpIdx, unsigned ExtOpcode) {
1266   MachineOperand &MO = MI.getOperand(OpIdx);
1267   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1268   MO.setReg(ExtB.getReg(0));
1269 }
1270 
1271 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1272                                       unsigned OpIdx) {
1273   MachineOperand &MO = MI.getOperand(OpIdx);
1274   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1275   MO.setReg(ExtB.getReg(0));
1276 }
1277 
1278 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1279                                      unsigned OpIdx, unsigned TruncOpcode) {
1280   MachineOperand &MO = MI.getOperand(OpIdx);
1281   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1282   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1283   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1284   MO.setReg(DstExt);
1285 }
1286 
1287 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1288                                       unsigned OpIdx, unsigned ExtOpcode) {
1289   MachineOperand &MO = MI.getOperand(OpIdx);
1290   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1291   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1292   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1293   MO.setReg(DstTrunc);
1294 }
1295 
1296 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1297                                             unsigned OpIdx) {
1298   MachineOperand &MO = MI.getOperand(OpIdx);
1299   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1300   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1301   MIRBuilder.buildExtract(MO, DstExt, 0);
1302   MO.setReg(DstExt);
1303 }
1304 
1305 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1306                                             unsigned OpIdx) {
1307   MachineOperand &MO = MI.getOperand(OpIdx);
1308 
1309   LLT OldTy = MRI.getType(MO.getReg());
1310   unsigned OldElts = OldTy.getNumElements();
1311   unsigned NewElts = MoreTy.getNumElements();
1312 
1313   unsigned NumParts = NewElts / OldElts;
1314 
1315   // Use concat_vectors if the result is a multiple of the number of elements.
1316   if (NumParts * OldElts == NewElts) {
1317     SmallVector<Register, 8> Parts;
1318     Parts.push_back(MO.getReg());
1319 
1320     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1321     for (unsigned I = 1; I != NumParts; ++I)
1322       Parts.push_back(ImpDef);
1323 
1324     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1325     MO.setReg(Concat.getReg(0));
1326     return;
1327   }
1328 
1329   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1330   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1331   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1332   MO.setReg(MoreReg);
1333 }
1334 
1335 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1336   MachineOperand &Op = MI.getOperand(OpIdx);
1337   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1338 }
1339 
1340 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1341   MachineOperand &MO = MI.getOperand(OpIdx);
1342   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1343   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1344   MIRBuilder.buildBitcast(MO, CastDst);
1345   MO.setReg(CastDst);
1346 }
1347 
1348 LegalizerHelper::LegalizeResult
1349 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1350                                         LLT WideTy) {
1351   if (TypeIdx != 1)
1352     return UnableToLegalize;
1353 
1354   Register DstReg = MI.getOperand(0).getReg();
1355   LLT DstTy = MRI.getType(DstReg);
1356   if (DstTy.isVector())
1357     return UnableToLegalize;
1358 
1359   Register Src1 = MI.getOperand(1).getReg();
1360   LLT SrcTy = MRI.getType(Src1);
1361   const int DstSize = DstTy.getSizeInBits();
1362   const int SrcSize = SrcTy.getSizeInBits();
1363   const int WideSize = WideTy.getSizeInBits();
1364   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1365 
1366   unsigned NumOps = MI.getNumOperands();
1367   unsigned NumSrc = MI.getNumOperands() - 1;
1368   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1369 
1370   if (WideSize >= DstSize) {
1371     // Directly pack the bits in the target type.
1372     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1373 
1374     for (unsigned I = 2; I != NumOps; ++I) {
1375       const unsigned Offset = (I - 1) * PartSize;
1376 
1377       Register SrcReg = MI.getOperand(I).getReg();
1378       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1379 
1380       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1381 
1382       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1383         MRI.createGenericVirtualRegister(WideTy);
1384 
1385       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1386       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1387       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1388       ResultReg = NextResult;
1389     }
1390 
1391     if (WideSize > DstSize)
1392       MIRBuilder.buildTrunc(DstReg, ResultReg);
1393     else if (DstTy.isPointer())
1394       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1395 
1396     MI.eraseFromParent();
1397     return Legalized;
1398   }
1399 
1400   // Unmerge the original values to the GCD type, and recombine to the next
1401   // multiple greater than the original type.
1402   //
1403   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1404   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1405   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1406   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1407   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1408   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1409   // %12:_(s12) = G_MERGE_VALUES %10, %11
1410   //
1411   // Padding with undef if necessary:
1412   //
1413   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1414   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1415   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1416   // %7:_(s2) = G_IMPLICIT_DEF
1417   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1418   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1419   // %10:_(s12) = G_MERGE_VALUES %8, %9
1420 
1421   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1422   LLT GCDTy = LLT::scalar(GCD);
1423 
1424   SmallVector<Register, 8> Parts;
1425   SmallVector<Register, 8> NewMergeRegs;
1426   SmallVector<Register, 8> Unmerges;
1427   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1428 
1429   // Decompose the original operands if they don't evenly divide.
1430   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1431     Register SrcReg = MI.getOperand(I).getReg();
1432     if (GCD == SrcSize) {
1433       Unmerges.push_back(SrcReg);
1434     } else {
1435       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1436       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1437         Unmerges.push_back(Unmerge.getReg(J));
1438     }
1439   }
1440 
1441   // Pad with undef to the next size that is a multiple of the requested size.
1442   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1443     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1444     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1445       Unmerges.push_back(UndefReg);
1446   }
1447 
1448   const int PartsPerGCD = WideSize / GCD;
1449 
1450   // Build merges of each piece.
1451   ArrayRef<Register> Slicer(Unmerges);
1452   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1453     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1454     NewMergeRegs.push_back(Merge.getReg(0));
1455   }
1456 
1457   // A truncate may be necessary if the requested type doesn't evenly divide the
1458   // original result type.
1459   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1460     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1461   } else {
1462     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1463     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1464   }
1465 
1466   MI.eraseFromParent();
1467   return Legalized;
1468 }
1469 
1470 LegalizerHelper::LegalizeResult
1471 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1472                                           LLT WideTy) {
1473   if (TypeIdx != 0)
1474     return UnableToLegalize;
1475 
1476   int NumDst = MI.getNumOperands() - 1;
1477   Register SrcReg = MI.getOperand(NumDst).getReg();
1478   LLT SrcTy = MRI.getType(SrcReg);
1479   if (SrcTy.isVector())
1480     return UnableToLegalize;
1481 
1482   Register Dst0Reg = MI.getOperand(0).getReg();
1483   LLT DstTy = MRI.getType(Dst0Reg);
1484   if (!DstTy.isScalar())
1485     return UnableToLegalize;
1486 
1487   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1488     if (SrcTy.isPointer()) {
1489       const DataLayout &DL = MIRBuilder.getDataLayout();
1490       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1491         LLVM_DEBUG(
1492             dbgs() << "Not casting non-integral address space integer\n");
1493         return UnableToLegalize;
1494       }
1495 
1496       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1497       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1498     }
1499 
1500     // Widen SrcTy to WideTy. This does not affect the result, but since the
1501     // user requested this size, it is probably better handled than SrcTy and
1502     // should reduce the total number of legalization artifacts
1503     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1504       SrcTy = WideTy;
1505       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1506     }
1507 
1508     // Theres no unmerge type to target. Directly extract the bits from the
1509     // source type
1510     unsigned DstSize = DstTy.getSizeInBits();
1511 
1512     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1513     for (int I = 1; I != NumDst; ++I) {
1514       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1515       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1516       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1517     }
1518 
1519     MI.eraseFromParent();
1520     return Legalized;
1521   }
1522 
1523   // Extend the source to a wider type.
1524   LLT LCMTy = getLCMType(SrcTy, WideTy);
1525 
1526   Register WideSrc = SrcReg;
1527   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1528     // TODO: If this is an integral address space, cast to integer and anyext.
1529     if (SrcTy.isPointer()) {
1530       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1531       return UnableToLegalize;
1532     }
1533 
1534     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1535   }
1536 
1537   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1538 
1539   // Create a sequence of unmerges to the original results. since we may have
1540   // widened the source, we will need to pad the results with dead defs to cover
1541   // the source register.
1542   // e.g. widen s16 to s32:
1543   // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
1544   //
1545   // =>
1546   //  %4:_(s64) = G_ANYEXT %0:_(s48)
1547   //  %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge
1548   //  %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs
1549   //  %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def
1550 
1551   const int NumUnmerge = Unmerge->getNumOperands() - 1;
1552   const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1553 
1554   for (int I = 0; I != NumUnmerge; ++I) {
1555     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1556 
1557     for (int J = 0; J != PartsPerUnmerge; ++J) {
1558       int Idx = I * PartsPerUnmerge + J;
1559       if (Idx < NumDst)
1560         MIB.addDef(MI.getOperand(Idx).getReg());
1561       else {
1562         // Create dead def for excess components.
1563         MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1564       }
1565     }
1566 
1567     MIB.addUse(Unmerge.getReg(I));
1568   }
1569 
1570   MI.eraseFromParent();
1571   return Legalized;
1572 }
1573 
1574 LegalizerHelper::LegalizeResult
1575 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1576                                     LLT WideTy) {
1577   Register DstReg = MI.getOperand(0).getReg();
1578   Register SrcReg = MI.getOperand(1).getReg();
1579   LLT SrcTy = MRI.getType(SrcReg);
1580 
1581   LLT DstTy = MRI.getType(DstReg);
1582   unsigned Offset = MI.getOperand(2).getImm();
1583 
1584   if (TypeIdx == 0) {
1585     if (SrcTy.isVector() || DstTy.isVector())
1586       return UnableToLegalize;
1587 
1588     SrcOp Src(SrcReg);
1589     if (SrcTy.isPointer()) {
1590       // Extracts from pointers can be handled only if they are really just
1591       // simple integers.
1592       const DataLayout &DL = MIRBuilder.getDataLayout();
1593       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1594         return UnableToLegalize;
1595 
1596       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1597       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1598       SrcTy = SrcAsIntTy;
1599     }
1600 
1601     if (DstTy.isPointer())
1602       return UnableToLegalize;
1603 
1604     if (Offset == 0) {
1605       // Avoid a shift in the degenerate case.
1606       MIRBuilder.buildTrunc(DstReg,
1607                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1608       MI.eraseFromParent();
1609       return Legalized;
1610     }
1611 
1612     // Do a shift in the source type.
1613     LLT ShiftTy = SrcTy;
1614     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1615       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1616       ShiftTy = WideTy;
1617     } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1618       return UnableToLegalize;
1619 
1620     auto LShr = MIRBuilder.buildLShr(
1621       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1622     MIRBuilder.buildTrunc(DstReg, LShr);
1623     MI.eraseFromParent();
1624     return Legalized;
1625   }
1626 
1627   if (SrcTy.isScalar()) {
1628     Observer.changingInstr(MI);
1629     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1630     Observer.changedInstr(MI);
1631     return Legalized;
1632   }
1633 
1634   if (!SrcTy.isVector())
1635     return UnableToLegalize;
1636 
1637   if (DstTy != SrcTy.getElementType())
1638     return UnableToLegalize;
1639 
1640   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1641     return UnableToLegalize;
1642 
1643   Observer.changingInstr(MI);
1644   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1645 
1646   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1647                           Offset);
1648   widenScalarDst(MI, WideTy.getScalarType(), 0);
1649   Observer.changedInstr(MI);
1650   return Legalized;
1651 }
1652 
1653 LegalizerHelper::LegalizeResult
1654 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1655                                    LLT WideTy) {
1656   if (TypeIdx != 0 || WideTy.isVector())
1657     return UnableToLegalize;
1658   Observer.changingInstr(MI);
1659   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1660   widenScalarDst(MI, WideTy);
1661   Observer.changedInstr(MI);
1662   return Legalized;
1663 }
1664 
1665 LegalizerHelper::LegalizeResult
1666 LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
1667                                       LLT WideTy) {
1668   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1669                   MI.getOpcode() == TargetOpcode::G_SSUBSAT;
1670   // We can convert this to:
1671   //   1. Any extend iN to iM
1672   //   2. SHL by M-N
1673   //   3. [US][ADD|SUB]SAT
1674   //   4. L/ASHR by M-N
1675   //
1676   // It may be more efficient to lower this to a min and a max operation in
1677   // the higher precision arithmetic if the promoted operation isn't legal,
1678   // but this decision is up to the target's lowering request.
1679   Register DstReg = MI.getOperand(0).getReg();
1680 
1681   unsigned NewBits = WideTy.getScalarSizeInBits();
1682   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1683 
1684   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1685   auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1686   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1687   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1688   auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1689 
1690   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1691                                         {ShiftL, ShiftR}, MI.getFlags());
1692 
1693   // Use a shift that will preserve the number of sign bits when the trunc is
1694   // folded away.
1695   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1696                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1697 
1698   MIRBuilder.buildTrunc(DstReg, Result);
1699   MI.eraseFromParent();
1700   return Legalized;
1701 }
1702 
1703 LegalizerHelper::LegalizeResult
1704 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1705   switch (MI.getOpcode()) {
1706   default:
1707     return UnableToLegalize;
1708   case TargetOpcode::G_EXTRACT:
1709     return widenScalarExtract(MI, TypeIdx, WideTy);
1710   case TargetOpcode::G_INSERT:
1711     return widenScalarInsert(MI, TypeIdx, WideTy);
1712   case TargetOpcode::G_MERGE_VALUES:
1713     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1714   case TargetOpcode::G_UNMERGE_VALUES:
1715     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1716   case TargetOpcode::G_UADDO:
1717   case TargetOpcode::G_USUBO: {
1718     if (TypeIdx == 1)
1719       return UnableToLegalize; // TODO
1720     auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
1721     auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
1722     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1723                           ? TargetOpcode::G_ADD
1724                           : TargetOpcode::G_SUB;
1725     // Do the arithmetic in the larger type.
1726     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1727     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1728     APInt Mask =
1729         APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
1730     auto AndOp = MIRBuilder.buildAnd(
1731         WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
1732     // There is no overflow if the AndOp is the same as NewOp.
1733     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
1734     // Now trunc the NewOp to the original result.
1735     MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1736     MI.eraseFromParent();
1737     return Legalized;
1738   }
1739   case TargetOpcode::G_SADDSAT:
1740   case TargetOpcode::G_SSUBSAT:
1741   case TargetOpcode::G_UADDSAT:
1742   case TargetOpcode::G_USUBSAT:
1743     return widenScalarAddSubSat(MI, TypeIdx, WideTy);
1744   case TargetOpcode::G_CTTZ:
1745   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1746   case TargetOpcode::G_CTLZ:
1747   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1748   case TargetOpcode::G_CTPOP: {
1749     if (TypeIdx == 0) {
1750       Observer.changingInstr(MI);
1751       widenScalarDst(MI, WideTy, 0);
1752       Observer.changedInstr(MI);
1753       return Legalized;
1754     }
1755 
1756     Register SrcReg = MI.getOperand(1).getReg();
1757 
1758     // First ZEXT the input.
1759     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1760     LLT CurTy = MRI.getType(SrcReg);
1761     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1762       // The count is the same in the larger type except if the original
1763       // value was zero.  This can be handled by setting the bit just off
1764       // the top of the original type.
1765       auto TopBit =
1766           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1767       MIBSrc = MIRBuilder.buildOr(
1768         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1769     }
1770 
1771     // Perform the operation at the larger size.
1772     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1773     // This is already the correct result for CTPOP and CTTZs
1774     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1775         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1776       // The correct result is NewOp - (Difference in widety and current ty).
1777       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1778       MIBNewOp = MIRBuilder.buildSub(
1779           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1780     }
1781 
1782     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1783     MI.eraseFromParent();
1784     return Legalized;
1785   }
1786   case TargetOpcode::G_BSWAP: {
1787     Observer.changingInstr(MI);
1788     Register DstReg = MI.getOperand(0).getReg();
1789 
1790     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1791     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1792     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1793     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1794 
1795     MI.getOperand(0).setReg(DstExt);
1796 
1797     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1798 
1799     LLT Ty = MRI.getType(DstReg);
1800     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1801     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1802     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1803 
1804     MIRBuilder.buildTrunc(DstReg, ShrReg);
1805     Observer.changedInstr(MI);
1806     return Legalized;
1807   }
1808   case TargetOpcode::G_BITREVERSE: {
1809     Observer.changingInstr(MI);
1810 
1811     Register DstReg = MI.getOperand(0).getReg();
1812     LLT Ty = MRI.getType(DstReg);
1813     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1814 
1815     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1816     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1817     MI.getOperand(0).setReg(DstExt);
1818     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1819 
1820     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1821     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1822     MIRBuilder.buildTrunc(DstReg, Shift);
1823     Observer.changedInstr(MI);
1824     return Legalized;
1825   }
1826   case TargetOpcode::G_FREEZE:
1827     Observer.changingInstr(MI);
1828     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1829     widenScalarDst(MI, WideTy);
1830     Observer.changedInstr(MI);
1831     return Legalized;
1832 
1833   case TargetOpcode::G_ADD:
1834   case TargetOpcode::G_AND:
1835   case TargetOpcode::G_MUL:
1836   case TargetOpcode::G_OR:
1837   case TargetOpcode::G_XOR:
1838   case TargetOpcode::G_SUB:
1839     // Perform operation at larger width (any extension is fines here, high bits
1840     // don't affect the result) and then truncate the result back to the
1841     // original type.
1842     Observer.changingInstr(MI);
1843     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1844     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1845     widenScalarDst(MI, WideTy);
1846     Observer.changedInstr(MI);
1847     return Legalized;
1848 
1849   case TargetOpcode::G_SHL:
1850     Observer.changingInstr(MI);
1851 
1852     if (TypeIdx == 0) {
1853       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1854       widenScalarDst(MI, WideTy);
1855     } else {
1856       assert(TypeIdx == 1);
1857       // The "number of bits to shift" operand must preserve its value as an
1858       // unsigned integer:
1859       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1860     }
1861 
1862     Observer.changedInstr(MI);
1863     return Legalized;
1864 
1865   case TargetOpcode::G_SDIV:
1866   case TargetOpcode::G_SREM:
1867   case TargetOpcode::G_SMIN:
1868   case TargetOpcode::G_SMAX:
1869     Observer.changingInstr(MI);
1870     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1871     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1872     widenScalarDst(MI, WideTy);
1873     Observer.changedInstr(MI);
1874     return Legalized;
1875 
1876   case TargetOpcode::G_ASHR:
1877   case TargetOpcode::G_LSHR:
1878     Observer.changingInstr(MI);
1879 
1880     if (TypeIdx == 0) {
1881       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1882         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1883 
1884       widenScalarSrc(MI, WideTy, 1, CvtOp);
1885       widenScalarDst(MI, WideTy);
1886     } else {
1887       assert(TypeIdx == 1);
1888       // The "number of bits to shift" operand must preserve its value as an
1889       // unsigned integer:
1890       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1891     }
1892 
1893     Observer.changedInstr(MI);
1894     return Legalized;
1895   case TargetOpcode::G_UDIV:
1896   case TargetOpcode::G_UREM:
1897   case TargetOpcode::G_UMIN:
1898   case TargetOpcode::G_UMAX:
1899     Observer.changingInstr(MI);
1900     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1901     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1902     widenScalarDst(MI, WideTy);
1903     Observer.changedInstr(MI);
1904     return Legalized;
1905 
1906   case TargetOpcode::G_SELECT:
1907     Observer.changingInstr(MI);
1908     if (TypeIdx == 0) {
1909       // Perform operation at larger width (any extension is fine here, high
1910       // bits don't affect the result) and then truncate the result back to the
1911       // original type.
1912       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1913       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1914       widenScalarDst(MI, WideTy);
1915     } else {
1916       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1917       // Explicit extension is required here since high bits affect the result.
1918       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1919     }
1920     Observer.changedInstr(MI);
1921     return Legalized;
1922 
1923   case TargetOpcode::G_FPTOSI:
1924   case TargetOpcode::G_FPTOUI:
1925     Observer.changingInstr(MI);
1926 
1927     if (TypeIdx == 0)
1928       widenScalarDst(MI, WideTy);
1929     else
1930       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1931 
1932     Observer.changedInstr(MI);
1933     return Legalized;
1934   case TargetOpcode::G_SITOFP:
1935     Observer.changingInstr(MI);
1936 
1937     if (TypeIdx == 0)
1938       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1939     else
1940       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1941 
1942     Observer.changedInstr(MI);
1943     return Legalized;
1944   case TargetOpcode::G_UITOFP:
1945     Observer.changingInstr(MI);
1946 
1947     if (TypeIdx == 0)
1948       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1949     else
1950       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1951 
1952     Observer.changedInstr(MI);
1953     return Legalized;
1954   case TargetOpcode::G_LOAD:
1955   case TargetOpcode::G_SEXTLOAD:
1956   case TargetOpcode::G_ZEXTLOAD:
1957     Observer.changingInstr(MI);
1958     widenScalarDst(MI, WideTy);
1959     Observer.changedInstr(MI);
1960     return Legalized;
1961 
1962   case TargetOpcode::G_STORE: {
1963     if (TypeIdx != 0)
1964       return UnableToLegalize;
1965 
1966     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1967     if (!isPowerOf2_32(Ty.getSizeInBits()))
1968       return UnableToLegalize;
1969 
1970     Observer.changingInstr(MI);
1971 
1972     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1973       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1974     widenScalarSrc(MI, WideTy, 0, ExtType);
1975 
1976     Observer.changedInstr(MI);
1977     return Legalized;
1978   }
1979   case TargetOpcode::G_CONSTANT: {
1980     MachineOperand &SrcMO = MI.getOperand(1);
1981     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1982     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
1983         MRI.getType(MI.getOperand(0).getReg()));
1984     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
1985             ExtOpc == TargetOpcode::G_ANYEXT) &&
1986            "Illegal Extend");
1987     const APInt &SrcVal = SrcMO.getCImm()->getValue();
1988     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
1989                            ? SrcVal.sext(WideTy.getSizeInBits())
1990                            : SrcVal.zext(WideTy.getSizeInBits());
1991     Observer.changingInstr(MI);
1992     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1993 
1994     widenScalarDst(MI, WideTy);
1995     Observer.changedInstr(MI);
1996     return Legalized;
1997   }
1998   case TargetOpcode::G_FCONSTANT: {
1999     MachineOperand &SrcMO = MI.getOperand(1);
2000     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2001     APFloat Val = SrcMO.getFPImm()->getValueAPF();
2002     bool LosesInfo;
2003     switch (WideTy.getSizeInBits()) {
2004     case 32:
2005       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
2006                   &LosesInfo);
2007       break;
2008     case 64:
2009       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
2010                   &LosesInfo);
2011       break;
2012     default:
2013       return UnableToLegalize;
2014     }
2015 
2016     assert(!LosesInfo && "extend should always be lossless");
2017 
2018     Observer.changingInstr(MI);
2019     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2020 
2021     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2022     Observer.changedInstr(MI);
2023     return Legalized;
2024   }
2025   case TargetOpcode::G_IMPLICIT_DEF: {
2026     Observer.changingInstr(MI);
2027     widenScalarDst(MI, WideTy);
2028     Observer.changedInstr(MI);
2029     return Legalized;
2030   }
2031   case TargetOpcode::G_BRCOND:
2032     Observer.changingInstr(MI);
2033     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2034     Observer.changedInstr(MI);
2035     return Legalized;
2036 
2037   case TargetOpcode::G_FCMP:
2038     Observer.changingInstr(MI);
2039     if (TypeIdx == 0)
2040       widenScalarDst(MI, WideTy);
2041     else {
2042       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2043       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2044     }
2045     Observer.changedInstr(MI);
2046     return Legalized;
2047 
2048   case TargetOpcode::G_ICMP:
2049     Observer.changingInstr(MI);
2050     if (TypeIdx == 0)
2051       widenScalarDst(MI, WideTy);
2052     else {
2053       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2054                                MI.getOperand(1).getPredicate()))
2055                                ? TargetOpcode::G_SEXT
2056                                : TargetOpcode::G_ZEXT;
2057       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2058       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2059     }
2060     Observer.changedInstr(MI);
2061     return Legalized;
2062 
2063   case TargetOpcode::G_PTR_ADD:
2064     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2065     Observer.changingInstr(MI);
2066     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2067     Observer.changedInstr(MI);
2068     return Legalized;
2069 
2070   case TargetOpcode::G_PHI: {
2071     assert(TypeIdx == 0 && "Expecting only Idx 0");
2072 
2073     Observer.changingInstr(MI);
2074     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2075       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2076       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2077       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2078     }
2079 
2080     MachineBasicBlock &MBB = *MI.getParent();
2081     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2082     widenScalarDst(MI, WideTy);
2083     Observer.changedInstr(MI);
2084     return Legalized;
2085   }
2086   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2087     if (TypeIdx == 0) {
2088       Register VecReg = MI.getOperand(1).getReg();
2089       LLT VecTy = MRI.getType(VecReg);
2090       Observer.changingInstr(MI);
2091 
2092       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
2093                                      WideTy.getSizeInBits()),
2094                      1, TargetOpcode::G_SEXT);
2095 
2096       widenScalarDst(MI, WideTy, 0);
2097       Observer.changedInstr(MI);
2098       return Legalized;
2099     }
2100 
2101     if (TypeIdx != 2)
2102       return UnableToLegalize;
2103     Observer.changingInstr(MI);
2104     // TODO: Probably should be zext
2105     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2106     Observer.changedInstr(MI);
2107     return Legalized;
2108   }
2109   case TargetOpcode::G_INSERT_VECTOR_ELT: {
2110     if (TypeIdx == 1) {
2111       Observer.changingInstr(MI);
2112 
2113       Register VecReg = MI.getOperand(1).getReg();
2114       LLT VecTy = MRI.getType(VecReg);
2115       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2116 
2117       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2118       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2119       widenScalarDst(MI, WideVecTy, 0);
2120       Observer.changedInstr(MI);
2121       return Legalized;
2122     }
2123 
2124     if (TypeIdx == 2) {
2125       Observer.changingInstr(MI);
2126       // TODO: Probably should be zext
2127       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2128       Observer.changedInstr(MI);
2129       return Legalized;
2130     }
2131 
2132     return UnableToLegalize;
2133   }
2134   case TargetOpcode::G_FADD:
2135   case TargetOpcode::G_FMUL:
2136   case TargetOpcode::G_FSUB:
2137   case TargetOpcode::G_FMA:
2138   case TargetOpcode::G_FMAD:
2139   case TargetOpcode::G_FNEG:
2140   case TargetOpcode::G_FABS:
2141   case TargetOpcode::G_FCANONICALIZE:
2142   case TargetOpcode::G_FMINNUM:
2143   case TargetOpcode::G_FMAXNUM:
2144   case TargetOpcode::G_FMINNUM_IEEE:
2145   case TargetOpcode::G_FMAXNUM_IEEE:
2146   case TargetOpcode::G_FMINIMUM:
2147   case TargetOpcode::G_FMAXIMUM:
2148   case TargetOpcode::G_FDIV:
2149   case TargetOpcode::G_FREM:
2150   case TargetOpcode::G_FCEIL:
2151   case TargetOpcode::G_FFLOOR:
2152   case TargetOpcode::G_FCOS:
2153   case TargetOpcode::G_FSIN:
2154   case TargetOpcode::G_FLOG10:
2155   case TargetOpcode::G_FLOG:
2156   case TargetOpcode::G_FLOG2:
2157   case TargetOpcode::G_FRINT:
2158   case TargetOpcode::G_FNEARBYINT:
2159   case TargetOpcode::G_FSQRT:
2160   case TargetOpcode::G_FEXP:
2161   case TargetOpcode::G_FEXP2:
2162   case TargetOpcode::G_FPOW:
2163   case TargetOpcode::G_INTRINSIC_TRUNC:
2164   case TargetOpcode::G_INTRINSIC_ROUND:
2165     assert(TypeIdx == 0);
2166     Observer.changingInstr(MI);
2167 
2168     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2169       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2170 
2171     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2172     Observer.changedInstr(MI);
2173     return Legalized;
2174   case TargetOpcode::G_FPOWI: {
2175     if (TypeIdx != 0)
2176       return UnableToLegalize;
2177     Observer.changingInstr(MI);
2178     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2179     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2180     Observer.changedInstr(MI);
2181     return Legalized;
2182   }
2183   case TargetOpcode::G_INTTOPTR:
2184     if (TypeIdx != 1)
2185       return UnableToLegalize;
2186 
2187     Observer.changingInstr(MI);
2188     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2189     Observer.changedInstr(MI);
2190     return Legalized;
2191   case TargetOpcode::G_PTRTOINT:
2192     if (TypeIdx != 0)
2193       return UnableToLegalize;
2194 
2195     Observer.changingInstr(MI);
2196     widenScalarDst(MI, WideTy, 0);
2197     Observer.changedInstr(MI);
2198     return Legalized;
2199   case TargetOpcode::G_BUILD_VECTOR: {
2200     Observer.changingInstr(MI);
2201 
2202     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2203     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2204       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2205 
2206     // Avoid changing the result vector type if the source element type was
2207     // requested.
2208     if (TypeIdx == 1) {
2209       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2210     } else {
2211       widenScalarDst(MI, WideTy, 0);
2212     }
2213 
2214     Observer.changedInstr(MI);
2215     return Legalized;
2216   }
2217   case TargetOpcode::G_SEXT_INREG:
2218     if (TypeIdx != 0)
2219       return UnableToLegalize;
2220 
2221     Observer.changingInstr(MI);
2222     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2223     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2224     Observer.changedInstr(MI);
2225     return Legalized;
2226   case TargetOpcode::G_PTRMASK: {
2227     if (TypeIdx != 1)
2228       return UnableToLegalize;
2229     Observer.changingInstr(MI);
2230     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2231     Observer.changedInstr(MI);
2232     return Legalized;
2233   }
2234   }
2235 }
2236 
2237 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2238                              MachineIRBuilder &B, Register Src, LLT Ty) {
2239   auto Unmerge = B.buildUnmerge(Ty, Src);
2240   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2241     Pieces.push_back(Unmerge.getReg(I));
2242 }
2243 
2244 LegalizerHelper::LegalizeResult
2245 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2246   Register Dst = MI.getOperand(0).getReg();
2247   Register Src = MI.getOperand(1).getReg();
2248   LLT DstTy = MRI.getType(Dst);
2249   LLT SrcTy = MRI.getType(Src);
2250 
2251   if (SrcTy.isVector()) {
2252     LLT SrcEltTy = SrcTy.getElementType();
2253     SmallVector<Register, 8> SrcRegs;
2254 
2255     if (DstTy.isVector()) {
2256       int NumDstElt = DstTy.getNumElements();
2257       int NumSrcElt = SrcTy.getNumElements();
2258 
2259       LLT DstEltTy = DstTy.getElementType();
2260       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2261       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2262 
2263       // If there's an element size mismatch, insert intermediate casts to match
2264       // the result element type.
2265       if (NumSrcElt < NumDstElt) { // Source element type is larger.
2266         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2267         //
2268         // =>
2269         //
2270         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2271         // %3:_(<2 x s8>) = G_BITCAST %2
2272         // %4:_(<2 x s8>) = G_BITCAST %3
2273         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2274         DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2275         SrcPartTy = SrcEltTy;
2276       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2277         //
2278         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2279         //
2280         // =>
2281         //
2282         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2283         // %3:_(s16) = G_BITCAST %2
2284         // %4:_(s16) = G_BITCAST %3
2285         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2286         SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2287         DstCastTy = DstEltTy;
2288       }
2289 
2290       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2291       for (Register &SrcReg : SrcRegs)
2292         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2293     } else
2294       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2295 
2296     MIRBuilder.buildMerge(Dst, SrcRegs);
2297     MI.eraseFromParent();
2298     return Legalized;
2299   }
2300 
2301   if (DstTy.isVector()) {
2302     SmallVector<Register, 8> SrcRegs;
2303     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2304     MIRBuilder.buildMerge(Dst, SrcRegs);
2305     MI.eraseFromParent();
2306     return Legalized;
2307   }
2308 
2309   return UnableToLegalize;
2310 }
2311 
2312 LegalizerHelper::LegalizeResult
2313 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2314   switch (MI.getOpcode()) {
2315   case TargetOpcode::G_LOAD: {
2316     if (TypeIdx != 0)
2317       return UnableToLegalize;
2318 
2319     Observer.changingInstr(MI);
2320     bitcastDst(MI, CastTy, 0);
2321     Observer.changedInstr(MI);
2322     return Legalized;
2323   }
2324   case TargetOpcode::G_STORE: {
2325     if (TypeIdx != 0)
2326       return UnableToLegalize;
2327 
2328     Observer.changingInstr(MI);
2329     bitcastSrc(MI, CastTy, 0);
2330     Observer.changedInstr(MI);
2331     return Legalized;
2332   }
2333   case TargetOpcode::G_SELECT: {
2334     if (TypeIdx != 0)
2335       return UnableToLegalize;
2336 
2337     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2338       LLVM_DEBUG(
2339           dbgs() << "bitcast action not implemented for vector select\n");
2340       return UnableToLegalize;
2341     }
2342 
2343     Observer.changingInstr(MI);
2344     bitcastSrc(MI, CastTy, 2);
2345     bitcastSrc(MI, CastTy, 3);
2346     bitcastDst(MI, CastTy, 0);
2347     Observer.changedInstr(MI);
2348     return Legalized;
2349   }
2350   case TargetOpcode::G_AND:
2351   case TargetOpcode::G_OR:
2352   case TargetOpcode::G_XOR: {
2353     Observer.changingInstr(MI);
2354     bitcastSrc(MI, CastTy, 1);
2355     bitcastSrc(MI, CastTy, 2);
2356     bitcastDst(MI, CastTy, 0);
2357     Observer.changedInstr(MI);
2358     return Legalized;
2359   }
2360   default:
2361     return UnableToLegalize;
2362   }
2363 }
2364 
2365 LegalizerHelper::LegalizeResult
2366 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2367   using namespace TargetOpcode;
2368 
2369   switch(MI.getOpcode()) {
2370   default:
2371     return UnableToLegalize;
2372   case TargetOpcode::G_BITCAST:
2373     return lowerBitcast(MI);
2374   case TargetOpcode::G_SREM:
2375   case TargetOpcode::G_UREM: {
2376     auto Quot =
2377         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2378                               {MI.getOperand(1), MI.getOperand(2)});
2379 
2380     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2381     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2382     MI.eraseFromParent();
2383     return Legalized;
2384   }
2385   case TargetOpcode::G_SADDO:
2386   case TargetOpcode::G_SSUBO:
2387     return lowerSADDO_SSUBO(MI);
2388   case TargetOpcode::G_SMULO:
2389   case TargetOpcode::G_UMULO: {
2390     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2391     // result.
2392     Register Res = MI.getOperand(0).getReg();
2393     Register Overflow = MI.getOperand(1).getReg();
2394     Register LHS = MI.getOperand(2).getReg();
2395     Register RHS = MI.getOperand(3).getReg();
2396 
2397     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2398                           ? TargetOpcode::G_SMULH
2399                           : TargetOpcode::G_UMULH;
2400 
2401     Observer.changingInstr(MI);
2402     const auto &TII = MIRBuilder.getTII();
2403     MI.setDesc(TII.get(TargetOpcode::G_MUL));
2404     MI.RemoveOperand(1);
2405     Observer.changedInstr(MI);
2406 
2407     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2408 
2409     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2410     auto Zero = MIRBuilder.buildConstant(Ty, 0);
2411 
2412     // For *signed* multiply, overflow is detected by checking:
2413     // (hi != (lo >> bitwidth-1))
2414     if (Opcode == TargetOpcode::G_SMULH) {
2415       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2416       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2417       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2418     } else {
2419       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2420     }
2421     return Legalized;
2422   }
2423   case TargetOpcode::G_FNEG: {
2424     // TODO: Handle vector types once we are able to
2425     // represent them.
2426     if (Ty.isVector())
2427       return UnableToLegalize;
2428     Register Res = MI.getOperand(0).getReg();
2429     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2430     Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
2431     if (!ZeroTy)
2432       return UnableToLegalize;
2433     ConstantFP &ZeroForNegation =
2434         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
2435     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
2436     Register SubByReg = MI.getOperand(1).getReg();
2437     Register ZeroReg = Zero.getReg(0);
2438     MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
2439     MI.eraseFromParent();
2440     return Legalized;
2441   }
2442   case TargetOpcode::G_FSUB: {
2443     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2444     // First, check if G_FNEG is marked as Lower. If so, we may
2445     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2446     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2447       return UnableToLegalize;
2448     Register Res = MI.getOperand(0).getReg();
2449     Register LHS = MI.getOperand(1).getReg();
2450     Register RHS = MI.getOperand(2).getReg();
2451     Register Neg = MRI.createGenericVirtualRegister(Ty);
2452     MIRBuilder.buildFNeg(Neg, RHS);
2453     MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2454     MI.eraseFromParent();
2455     return Legalized;
2456   }
2457   case TargetOpcode::G_FMAD:
2458     return lowerFMad(MI);
2459   case TargetOpcode::G_FFLOOR:
2460     return lowerFFloor(MI);
2461   case TargetOpcode::G_INTRINSIC_ROUND:
2462     return lowerIntrinsicRound(MI);
2463   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2464     Register OldValRes = MI.getOperand(0).getReg();
2465     Register SuccessRes = MI.getOperand(1).getReg();
2466     Register Addr = MI.getOperand(2).getReg();
2467     Register CmpVal = MI.getOperand(3).getReg();
2468     Register NewVal = MI.getOperand(4).getReg();
2469     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2470                                   **MI.memoperands_begin());
2471     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2472     MI.eraseFromParent();
2473     return Legalized;
2474   }
2475   case TargetOpcode::G_LOAD:
2476   case TargetOpcode::G_SEXTLOAD:
2477   case TargetOpcode::G_ZEXTLOAD: {
2478     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2479     Register DstReg = MI.getOperand(0).getReg();
2480     Register PtrReg = MI.getOperand(1).getReg();
2481     LLT DstTy = MRI.getType(DstReg);
2482     auto &MMO = **MI.memoperands_begin();
2483 
2484     if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2485       if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2486         // This load needs splitting into power of 2 sized loads.
2487         if (DstTy.isVector())
2488           return UnableToLegalize;
2489         if (isPowerOf2_32(DstTy.getSizeInBits()))
2490           return UnableToLegalize; // Don't know what we're being asked to do.
2491 
2492         // Our strategy here is to generate anyextending loads for the smaller
2493         // types up to next power-2 result type, and then combine the two larger
2494         // result values together, before truncating back down to the non-pow-2
2495         // type.
2496         // E.g. v1 = i24 load =>
2497         // v2 = i32 zextload (2 byte)
2498         // v3 = i32 load (1 byte)
2499         // v4 = i32 shl v3, 16
2500         // v5 = i32 or v4, v2
2501         // v1 = i24 trunc v5
2502         // By doing this we generate the correct truncate which should get
2503         // combined away as an artifact with a matching extend.
2504         uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2505         uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2506 
2507         MachineFunction &MF = MIRBuilder.getMF();
2508         MachineMemOperand *LargeMMO =
2509             MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2510         MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2511             &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2512 
2513         LLT PtrTy = MRI.getType(PtrReg);
2514         unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2515         LLT AnyExtTy = LLT::scalar(AnyExtSize);
2516         Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2517         Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2518         auto LargeLoad = MIRBuilder.buildLoadInstr(
2519             TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2520 
2521         auto OffsetCst = MIRBuilder.buildConstant(
2522             LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2523         Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2524         auto SmallPtr =
2525             MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2526         auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2527                                               *SmallMMO);
2528 
2529         auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2530         auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2531         auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2532         MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2533         MI.eraseFromParent();
2534         return Legalized;
2535       }
2536       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2537       MI.eraseFromParent();
2538       return Legalized;
2539     }
2540 
2541     if (DstTy.isScalar()) {
2542       Register TmpReg =
2543           MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2544       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2545       switch (MI.getOpcode()) {
2546       default:
2547         llvm_unreachable("Unexpected opcode");
2548       case TargetOpcode::G_LOAD:
2549         MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
2550         break;
2551       case TargetOpcode::G_SEXTLOAD:
2552         MIRBuilder.buildSExt(DstReg, TmpReg);
2553         break;
2554       case TargetOpcode::G_ZEXTLOAD:
2555         MIRBuilder.buildZExt(DstReg, TmpReg);
2556         break;
2557       }
2558       MI.eraseFromParent();
2559       return Legalized;
2560     }
2561 
2562     return UnableToLegalize;
2563   }
2564   case TargetOpcode::G_STORE: {
2565     // Lower a non-power of 2 store into multiple pow-2 stores.
2566     // E.g. split an i24 store into an i16 store + i8 store.
2567     // We do this by first extending the stored value to the next largest power
2568     // of 2 type, and then using truncating stores to store the components.
2569     // By doing this, likewise with G_LOAD, generate an extend that can be
2570     // artifact-combined away instead of leaving behind extracts.
2571     Register SrcReg = MI.getOperand(0).getReg();
2572     Register PtrReg = MI.getOperand(1).getReg();
2573     LLT SrcTy = MRI.getType(SrcReg);
2574     MachineMemOperand &MMO = **MI.memoperands_begin();
2575     if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2576       return UnableToLegalize;
2577     if (SrcTy.isVector())
2578       return UnableToLegalize;
2579     if (isPowerOf2_32(SrcTy.getSizeInBits()))
2580       return UnableToLegalize; // Don't know what we're being asked to do.
2581 
2582     // Extend to the next pow-2.
2583     const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2584     auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2585 
2586     // Obtain the smaller value by shifting away the larger value.
2587     uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2588     uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2589     auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2590     auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2591 
2592     // Generate the PtrAdd and truncating stores.
2593     LLT PtrTy = MRI.getType(PtrReg);
2594     auto OffsetCst = MIRBuilder.buildConstant(
2595             LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2596     Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2597     auto SmallPtr =
2598         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2599 
2600     MachineFunction &MF = MIRBuilder.getMF();
2601     MachineMemOperand *LargeMMO =
2602         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2603     MachineMemOperand *SmallMMO =
2604         MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2605     MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2606     MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2607     MI.eraseFromParent();
2608     return Legalized;
2609   }
2610   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2611   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2612   case TargetOpcode::G_CTLZ:
2613   case TargetOpcode::G_CTTZ:
2614   case TargetOpcode::G_CTPOP:
2615     return lowerBitCount(MI, TypeIdx, Ty);
2616   case G_UADDO: {
2617     Register Res = MI.getOperand(0).getReg();
2618     Register CarryOut = MI.getOperand(1).getReg();
2619     Register LHS = MI.getOperand(2).getReg();
2620     Register RHS = MI.getOperand(3).getReg();
2621 
2622     MIRBuilder.buildAdd(Res, LHS, RHS);
2623     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2624 
2625     MI.eraseFromParent();
2626     return Legalized;
2627   }
2628   case G_UADDE: {
2629     Register Res = MI.getOperand(0).getReg();
2630     Register CarryOut = MI.getOperand(1).getReg();
2631     Register LHS = MI.getOperand(2).getReg();
2632     Register RHS = MI.getOperand(3).getReg();
2633     Register CarryIn = MI.getOperand(4).getReg();
2634     LLT Ty = MRI.getType(Res);
2635 
2636     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
2637     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
2638     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2639     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2640 
2641     MI.eraseFromParent();
2642     return Legalized;
2643   }
2644   case G_USUBO: {
2645     Register Res = MI.getOperand(0).getReg();
2646     Register BorrowOut = MI.getOperand(1).getReg();
2647     Register LHS = MI.getOperand(2).getReg();
2648     Register RHS = MI.getOperand(3).getReg();
2649 
2650     MIRBuilder.buildSub(Res, LHS, RHS);
2651     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2652 
2653     MI.eraseFromParent();
2654     return Legalized;
2655   }
2656   case G_USUBE: {
2657     Register Res = MI.getOperand(0).getReg();
2658     Register BorrowOut = MI.getOperand(1).getReg();
2659     Register LHS = MI.getOperand(2).getReg();
2660     Register RHS = MI.getOperand(3).getReg();
2661     Register BorrowIn = MI.getOperand(4).getReg();
2662     const LLT CondTy = MRI.getType(BorrowOut);
2663     const LLT Ty = MRI.getType(Res);
2664 
2665     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
2666     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
2667     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2668 
2669     auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
2670     auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
2671     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2672 
2673     MI.eraseFromParent();
2674     return Legalized;
2675   }
2676   case G_UITOFP:
2677     return lowerUITOFP(MI, TypeIdx, Ty);
2678   case G_SITOFP:
2679     return lowerSITOFP(MI, TypeIdx, Ty);
2680   case G_FPTOUI:
2681     return lowerFPTOUI(MI, TypeIdx, Ty);
2682   case G_FPTOSI:
2683     return lowerFPTOSI(MI);
2684   case G_FPTRUNC:
2685     return lowerFPTRUNC(MI, TypeIdx, Ty);
2686   case G_FPOWI:
2687     return lowerFPOWI(MI);
2688   case G_SMIN:
2689   case G_SMAX:
2690   case G_UMIN:
2691   case G_UMAX:
2692     return lowerMinMax(MI, TypeIdx, Ty);
2693   case G_FCOPYSIGN:
2694     return lowerFCopySign(MI, TypeIdx, Ty);
2695   case G_FMINNUM:
2696   case G_FMAXNUM:
2697     return lowerFMinNumMaxNum(MI);
2698   case G_MERGE_VALUES:
2699     return lowerMergeValues(MI);
2700   case G_UNMERGE_VALUES:
2701     return lowerUnmergeValues(MI);
2702   case TargetOpcode::G_SEXT_INREG: {
2703     assert(MI.getOperand(2).isImm() && "Expected immediate");
2704     int64_t SizeInBits = MI.getOperand(2).getImm();
2705 
2706     Register DstReg = MI.getOperand(0).getReg();
2707     Register SrcReg = MI.getOperand(1).getReg();
2708     LLT DstTy = MRI.getType(DstReg);
2709     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2710 
2711     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2712     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
2713     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
2714     MI.eraseFromParent();
2715     return Legalized;
2716   }
2717   case G_SHUFFLE_VECTOR:
2718     return lowerShuffleVector(MI);
2719   case G_DYN_STACKALLOC:
2720     return lowerDynStackAlloc(MI);
2721   case G_EXTRACT:
2722     return lowerExtract(MI);
2723   case G_INSERT:
2724     return lowerInsert(MI);
2725   case G_BSWAP:
2726     return lowerBswap(MI);
2727   case G_BITREVERSE:
2728     return lowerBitreverse(MI);
2729   case G_READ_REGISTER:
2730   case G_WRITE_REGISTER:
2731     return lowerReadWriteRegister(MI);
2732   case G_UADDSAT:
2733   case G_USUBSAT: {
2734     // Try to make a reasonable guess about which lowering strategy to use. The
2735     // target can override this with custom lowering and calling the
2736     // implementation functions.
2737     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2738     if (LI.isLegalOrCustom({G_UMIN, Ty}))
2739       return lowerAddSubSatToMinMax(MI);
2740     return lowerAddSubSatToAddoSubo(MI);
2741   }
2742   case G_SADDSAT:
2743   case G_SSUBSAT: {
2744     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2745 
2746     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
2747     // since it's a shorter expansion. However, we would need to figure out the
2748     // preferred boolean type for the carry out for the query.
2749     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
2750       return lowerAddSubSatToMinMax(MI);
2751     return lowerAddSubSatToAddoSubo(MI);
2752   }
2753   }
2754 }
2755 
2756 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2757     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2758   SmallVector<Register, 2> DstRegs;
2759 
2760   unsigned NarrowSize = NarrowTy.getSizeInBits();
2761   Register DstReg = MI.getOperand(0).getReg();
2762   unsigned Size = MRI.getType(DstReg).getSizeInBits();
2763   int NumParts = Size / NarrowSize;
2764   // FIXME: Don't know how to handle the situation where the small vectors
2765   // aren't all the same size yet.
2766   if (Size % NarrowSize != 0)
2767     return UnableToLegalize;
2768 
2769   for (int i = 0; i < NumParts; ++i) {
2770     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2771     MIRBuilder.buildUndef(TmpReg);
2772     DstRegs.push_back(TmpReg);
2773   }
2774 
2775   if (NarrowTy.isVector())
2776     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2777   else
2778     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2779 
2780   MI.eraseFromParent();
2781   return Legalized;
2782 }
2783 
2784 // Handle splitting vector operations which need to have the same number of
2785 // elements in each type index, but each type index may have a different element
2786 // type.
2787 //
2788 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2789 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2790 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2791 //
2792 // Also handles some irregular breakdown cases, e.g.
2793 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2794 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2795 //             s64 = G_SHL s64, s32
2796 LegalizerHelper::LegalizeResult
2797 LegalizerHelper::fewerElementsVectorMultiEltType(
2798   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2799   if (TypeIdx != 0)
2800     return UnableToLegalize;
2801 
2802   const LLT NarrowTy0 = NarrowTyArg;
2803   const unsigned NewNumElts =
2804       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2805 
2806   const Register DstReg = MI.getOperand(0).getReg();
2807   LLT DstTy = MRI.getType(DstReg);
2808   LLT LeftoverTy0;
2809 
2810   // All of the operands need to have the same number of elements, so if we can
2811   // determine a type breakdown for the result type, we can for all of the
2812   // source types.
2813   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2814   if (NumParts < 0)
2815     return UnableToLegalize;
2816 
2817   SmallVector<MachineInstrBuilder, 4> NewInsts;
2818 
2819   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2820   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2821 
2822   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2823     Register SrcReg = MI.getOperand(I).getReg();
2824     LLT SrcTyI = MRI.getType(SrcReg);
2825     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2826     LLT LeftoverTyI;
2827 
2828     // Split this operand into the requested typed registers, and any leftover
2829     // required to reproduce the original type.
2830     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2831                       LeftoverRegs))
2832       return UnableToLegalize;
2833 
2834     if (I == 1) {
2835       // For the first operand, create an instruction for each part and setup
2836       // the result.
2837       for (Register PartReg : PartRegs) {
2838         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2839         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2840                                .addDef(PartDstReg)
2841                                .addUse(PartReg));
2842         DstRegs.push_back(PartDstReg);
2843       }
2844 
2845       for (Register LeftoverReg : LeftoverRegs) {
2846         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2847         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2848                                .addDef(PartDstReg)
2849                                .addUse(LeftoverReg));
2850         LeftoverDstRegs.push_back(PartDstReg);
2851       }
2852     } else {
2853       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2854 
2855       // Add the newly created operand splits to the existing instructions. The
2856       // odd-sized pieces are ordered after the requested NarrowTyArg sized
2857       // pieces.
2858       unsigned InstCount = 0;
2859       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2860         NewInsts[InstCount++].addUse(PartRegs[J]);
2861       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2862         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2863     }
2864 
2865     PartRegs.clear();
2866     LeftoverRegs.clear();
2867   }
2868 
2869   // Insert the newly built operations and rebuild the result register.
2870   for (auto &MIB : NewInsts)
2871     MIRBuilder.insertInstr(MIB);
2872 
2873   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2874 
2875   MI.eraseFromParent();
2876   return Legalized;
2877 }
2878 
2879 LegalizerHelper::LegalizeResult
2880 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2881                                           LLT NarrowTy) {
2882   if (TypeIdx != 0)
2883     return UnableToLegalize;
2884 
2885   Register DstReg = MI.getOperand(0).getReg();
2886   Register SrcReg = MI.getOperand(1).getReg();
2887   LLT DstTy = MRI.getType(DstReg);
2888   LLT SrcTy = MRI.getType(SrcReg);
2889 
2890   LLT NarrowTy0 = NarrowTy;
2891   LLT NarrowTy1;
2892   unsigned NumParts;
2893 
2894   if (NarrowTy.isVector()) {
2895     // Uneven breakdown not handled.
2896     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2897     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2898       return UnableToLegalize;
2899 
2900     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2901   } else {
2902     NumParts = DstTy.getNumElements();
2903     NarrowTy1 = SrcTy.getElementType();
2904   }
2905 
2906   SmallVector<Register, 4> SrcRegs, DstRegs;
2907   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2908 
2909   for (unsigned I = 0; I < NumParts; ++I) {
2910     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2911     MachineInstr *NewInst =
2912         MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
2913 
2914     NewInst->setFlags(MI.getFlags());
2915     DstRegs.push_back(DstReg);
2916   }
2917 
2918   if (NarrowTy.isVector())
2919     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2920   else
2921     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2922 
2923   MI.eraseFromParent();
2924   return Legalized;
2925 }
2926 
2927 LegalizerHelper::LegalizeResult
2928 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2929                                         LLT NarrowTy) {
2930   Register DstReg = MI.getOperand(0).getReg();
2931   Register Src0Reg = MI.getOperand(2).getReg();
2932   LLT DstTy = MRI.getType(DstReg);
2933   LLT SrcTy = MRI.getType(Src0Reg);
2934 
2935   unsigned NumParts;
2936   LLT NarrowTy0, NarrowTy1;
2937 
2938   if (TypeIdx == 0) {
2939     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2940     unsigned OldElts = DstTy.getNumElements();
2941 
2942     NarrowTy0 = NarrowTy;
2943     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2944     NarrowTy1 = NarrowTy.isVector() ?
2945       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2946       SrcTy.getElementType();
2947 
2948   } else {
2949     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2950     unsigned OldElts = SrcTy.getNumElements();
2951 
2952     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2953       NarrowTy.getNumElements();
2954     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2955                             DstTy.getScalarSizeInBits());
2956     NarrowTy1 = NarrowTy;
2957   }
2958 
2959   // FIXME: Don't know how to handle the situation where the small vectors
2960   // aren't all the same size yet.
2961   if (NarrowTy1.isVector() &&
2962       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2963     return UnableToLegalize;
2964 
2965   CmpInst::Predicate Pred
2966     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2967 
2968   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2969   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2970   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2971 
2972   for (unsigned I = 0; I < NumParts; ++I) {
2973     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2974     DstRegs.push_back(DstReg);
2975 
2976     if (MI.getOpcode() == TargetOpcode::G_ICMP)
2977       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2978     else {
2979       MachineInstr *NewCmp
2980         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2981       NewCmp->setFlags(MI.getFlags());
2982     }
2983   }
2984 
2985   if (NarrowTy1.isVector())
2986     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2987   else
2988     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2989 
2990   MI.eraseFromParent();
2991   return Legalized;
2992 }
2993 
2994 LegalizerHelper::LegalizeResult
2995 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2996                                            LLT NarrowTy) {
2997   Register DstReg = MI.getOperand(0).getReg();
2998   Register CondReg = MI.getOperand(1).getReg();
2999 
3000   unsigned NumParts = 0;
3001   LLT NarrowTy0, NarrowTy1;
3002 
3003   LLT DstTy = MRI.getType(DstReg);
3004   LLT CondTy = MRI.getType(CondReg);
3005   unsigned Size = DstTy.getSizeInBits();
3006 
3007   assert(TypeIdx == 0 || CondTy.isVector());
3008 
3009   if (TypeIdx == 0) {
3010     NarrowTy0 = NarrowTy;
3011     NarrowTy1 = CondTy;
3012 
3013     unsigned NarrowSize = NarrowTy0.getSizeInBits();
3014     // FIXME: Don't know how to handle the situation where the small vectors
3015     // aren't all the same size yet.
3016     if (Size % NarrowSize != 0)
3017       return UnableToLegalize;
3018 
3019     NumParts = Size / NarrowSize;
3020 
3021     // Need to break down the condition type
3022     if (CondTy.isVector()) {
3023       if (CondTy.getNumElements() == NumParts)
3024         NarrowTy1 = CondTy.getElementType();
3025       else
3026         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3027                                 CondTy.getScalarSizeInBits());
3028     }
3029   } else {
3030     NumParts = CondTy.getNumElements();
3031     if (NarrowTy.isVector()) {
3032       // TODO: Handle uneven breakdown.
3033       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3034         return UnableToLegalize;
3035 
3036       return UnableToLegalize;
3037     } else {
3038       NarrowTy0 = DstTy.getElementType();
3039       NarrowTy1 = NarrowTy;
3040     }
3041   }
3042 
3043   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3044   if (CondTy.isVector())
3045     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3046 
3047   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3048   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3049 
3050   for (unsigned i = 0; i < NumParts; ++i) {
3051     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3052     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3053                            Src1Regs[i], Src2Regs[i]);
3054     DstRegs.push_back(DstReg);
3055   }
3056 
3057   if (NarrowTy0.isVector())
3058     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3059   else
3060     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3061 
3062   MI.eraseFromParent();
3063   return Legalized;
3064 }
3065 
3066 LegalizerHelper::LegalizeResult
3067 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3068                                         LLT NarrowTy) {
3069   const Register DstReg = MI.getOperand(0).getReg();
3070   LLT PhiTy = MRI.getType(DstReg);
3071   LLT LeftoverTy;
3072 
3073   // All of the operands need to have the same number of elements, so if we can
3074   // determine a type breakdown for the result type, we can for all of the
3075   // source types.
3076   int NumParts, NumLeftover;
3077   std::tie(NumParts, NumLeftover)
3078     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3079   if (NumParts < 0)
3080     return UnableToLegalize;
3081 
3082   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3083   SmallVector<MachineInstrBuilder, 4> NewInsts;
3084 
3085   const int TotalNumParts = NumParts + NumLeftover;
3086 
3087   // Insert the new phis in the result block first.
3088   for (int I = 0; I != TotalNumParts; ++I) {
3089     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3090     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3091     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3092                        .addDef(PartDstReg));
3093     if (I < NumParts)
3094       DstRegs.push_back(PartDstReg);
3095     else
3096       LeftoverDstRegs.push_back(PartDstReg);
3097   }
3098 
3099   MachineBasicBlock *MBB = MI.getParent();
3100   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
3101   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3102 
3103   SmallVector<Register, 4> PartRegs, LeftoverRegs;
3104 
3105   // Insert code to extract the incoming values in each predecessor block.
3106   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3107     PartRegs.clear();
3108     LeftoverRegs.clear();
3109 
3110     Register SrcReg = MI.getOperand(I).getReg();
3111     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3112     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3113 
3114     LLT Unused;
3115     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3116                       LeftoverRegs))
3117       return UnableToLegalize;
3118 
3119     // Add the newly created operand splits to the existing instructions. The
3120     // odd-sized pieces are ordered after the requested NarrowTyArg sized
3121     // pieces.
3122     for (int J = 0; J != TotalNumParts; ++J) {
3123       MachineInstrBuilder MIB = NewInsts[J];
3124       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3125       MIB.addMBB(&OpMBB);
3126     }
3127   }
3128 
3129   MI.eraseFromParent();
3130   return Legalized;
3131 }
3132 
3133 LegalizerHelper::LegalizeResult
3134 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
3135                                                   unsigned TypeIdx,
3136                                                   LLT NarrowTy) {
3137   if (TypeIdx != 1)
3138     return UnableToLegalize;
3139 
3140   const int NumDst = MI.getNumOperands() - 1;
3141   const Register SrcReg = MI.getOperand(NumDst).getReg();
3142   LLT SrcTy = MRI.getType(SrcReg);
3143 
3144   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3145 
3146   // TODO: Create sequence of extracts.
3147   if (DstTy == NarrowTy)
3148     return UnableToLegalize;
3149 
3150   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3151   if (DstTy == GCDTy) {
3152     // This would just be a copy of the same unmerge.
3153     // TODO: Create extracts, pad with undef and create intermediate merges.
3154     return UnableToLegalize;
3155   }
3156 
3157   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3158   const int NumUnmerge = Unmerge->getNumOperands() - 1;
3159   const int PartsPerUnmerge = NumDst / NumUnmerge;
3160 
3161   for (int I = 0; I != NumUnmerge; ++I) {
3162     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3163 
3164     for (int J = 0; J != PartsPerUnmerge; ++J)
3165       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3166     MIB.addUse(Unmerge.getReg(I));
3167   }
3168 
3169   MI.eraseFromParent();
3170   return Legalized;
3171 }
3172 
3173 LegalizerHelper::LegalizeResult
3174 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
3175                                                 unsigned TypeIdx,
3176                                                 LLT NarrowTy) {
3177   assert(TypeIdx == 0 && "not a vector type index");
3178   Register DstReg = MI.getOperand(0).getReg();
3179   LLT DstTy = MRI.getType(DstReg);
3180   LLT SrcTy = DstTy.getElementType();
3181 
3182   int DstNumElts = DstTy.getNumElements();
3183   int NarrowNumElts = NarrowTy.getNumElements();
3184   int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
3185   LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
3186 
3187   SmallVector<Register, 8> ConcatOps;
3188   SmallVector<Register, 8> SubBuildVector;
3189 
3190   Register UndefReg;
3191   if (WidenedDstTy != DstTy)
3192     UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
3193 
3194   // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
3195   // necessary.
3196   //
3197   // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3198   //   -> <2 x s16>
3199   //
3200   // %4:_(s16) = G_IMPLICIT_DEF
3201   // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3202   // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3203   // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
3204   // %3:_(<3 x s16>) = G_EXTRACT %7, 0
3205   for (int I = 0; I != NumConcat; ++I) {
3206     for (int J = 0; J != NarrowNumElts; ++J) {
3207       int SrcIdx = NarrowNumElts * I + J;
3208 
3209       if (SrcIdx < DstNumElts) {
3210         Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
3211         SubBuildVector.push_back(SrcReg);
3212       } else
3213         SubBuildVector.push_back(UndefReg);
3214     }
3215 
3216     auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
3217     ConcatOps.push_back(BuildVec.getReg(0));
3218     SubBuildVector.clear();
3219   }
3220 
3221   if (DstTy == WidenedDstTy)
3222     MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
3223   else {
3224     auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
3225     MIRBuilder.buildExtract(DstReg, Concat, 0);
3226   }
3227 
3228   MI.eraseFromParent();
3229   return Legalized;
3230 }
3231 
3232 LegalizerHelper::LegalizeResult
3233 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3234                                       LLT NarrowTy) {
3235   // FIXME: Don't know how to handle secondary types yet.
3236   if (TypeIdx != 0)
3237     return UnableToLegalize;
3238 
3239   MachineMemOperand *MMO = *MI.memoperands_begin();
3240 
3241   // This implementation doesn't work for atomics. Give up instead of doing
3242   // something invalid.
3243   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3244       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3245     return UnableToLegalize;
3246 
3247   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3248   Register ValReg = MI.getOperand(0).getReg();
3249   Register AddrReg = MI.getOperand(1).getReg();
3250   LLT ValTy = MRI.getType(ValReg);
3251 
3252   // FIXME: Do we need a distinct NarrowMemory legalize action?
3253   if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3254     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3255     return UnableToLegalize;
3256   }
3257 
3258   int NumParts = -1;
3259   int NumLeftover = -1;
3260   LLT LeftoverTy;
3261   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3262   if (IsLoad) {
3263     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3264   } else {
3265     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3266                      NarrowLeftoverRegs)) {
3267       NumParts = NarrowRegs.size();
3268       NumLeftover = NarrowLeftoverRegs.size();
3269     }
3270   }
3271 
3272   if (NumParts == -1)
3273     return UnableToLegalize;
3274 
3275   const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
3276 
3277   unsigned TotalSize = ValTy.getSizeInBits();
3278 
3279   // Split the load/store into PartTy sized pieces starting at Offset. If this
3280   // is a load, return the new registers in ValRegs. For a store, each elements
3281   // of ValRegs should be PartTy. Returns the next offset that needs to be
3282   // handled.
3283   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3284                              unsigned Offset) -> unsigned {
3285     MachineFunction &MF = MIRBuilder.getMF();
3286     unsigned PartSize = PartTy.getSizeInBits();
3287     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3288          Offset += PartSize, ++Idx) {
3289       unsigned ByteSize = PartSize / 8;
3290       unsigned ByteOffset = Offset / 8;
3291       Register NewAddrReg;
3292 
3293       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3294 
3295       MachineMemOperand *NewMMO =
3296         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3297 
3298       if (IsLoad) {
3299         Register Dst = MRI.createGenericVirtualRegister(PartTy);
3300         ValRegs.push_back(Dst);
3301         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3302       } else {
3303         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3304       }
3305     }
3306 
3307     return Offset;
3308   };
3309 
3310   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3311 
3312   // Handle the rest of the register if this isn't an even type breakdown.
3313   if (LeftoverTy.isValid())
3314     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3315 
3316   if (IsLoad) {
3317     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3318                 LeftoverTy, NarrowLeftoverRegs);
3319   }
3320 
3321   MI.eraseFromParent();
3322   return Legalized;
3323 }
3324 
3325 LegalizerHelper::LegalizeResult
3326 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
3327                                       LLT NarrowTy) {
3328   assert(TypeIdx == 0 && "only one type index expected");
3329 
3330   const unsigned Opc = MI.getOpcode();
3331   const int NumOps = MI.getNumOperands() - 1;
3332   const Register DstReg = MI.getOperand(0).getReg();
3333   const unsigned Flags = MI.getFlags();
3334   const unsigned NarrowSize = NarrowTy.getSizeInBits();
3335   const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3336 
3337   assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3338 
3339   // First of all check whether we are narrowing (changing the element type)
3340   // or reducing the vector elements
3341   const LLT DstTy = MRI.getType(DstReg);
3342   const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3343 
3344   SmallVector<Register, 8> ExtractedRegs[3];
3345   SmallVector<Register, 8> Parts;
3346 
3347   unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3348 
3349   // Break down all the sources into NarrowTy pieces we can operate on. This may
3350   // involve creating merges to a wider type, padded with undef.
3351   for (int I = 0; I != NumOps; ++I) {
3352     Register SrcReg = MI.getOperand(I + 1).getReg();
3353     LLT SrcTy = MRI.getType(SrcReg);
3354 
3355     // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3356     // For fewerElements, this is a smaller vector with the same element type.
3357     LLT OpNarrowTy;
3358     if (IsNarrow) {
3359       OpNarrowTy = NarrowScalarTy;
3360 
3361       // In case of narrowing, we need to cast vectors to scalars for this to
3362       // work properly
3363       // FIXME: Can we do without the bitcast here if we're narrowing?
3364       if (SrcTy.isVector()) {
3365         SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3366         SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3367       }
3368     } else {
3369       OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
3370     }
3371 
3372     LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
3373 
3374     // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
3375     buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
3376                         TargetOpcode::G_ANYEXT);
3377   }
3378 
3379   SmallVector<Register, 8> ResultRegs;
3380 
3381   // Input operands for each sub-instruction.
3382   SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
3383 
3384   int NumParts = ExtractedRegs[0].size();
3385   const unsigned DstSize = DstTy.getSizeInBits();
3386   const LLT DstScalarTy = LLT::scalar(DstSize);
3387 
3388   // Narrowing needs to use scalar types
3389   LLT DstLCMTy, NarrowDstTy;
3390   if (IsNarrow) {
3391     DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
3392     NarrowDstTy = NarrowScalarTy;
3393   } else {
3394     DstLCMTy = getLCMType(DstTy, NarrowTy);
3395     NarrowDstTy = NarrowTy;
3396   }
3397 
3398   // We widened the source registers to satisfy merge/unmerge size
3399   // constraints. We'll have some extra fully undef parts.
3400   const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
3401 
3402   for (int I = 0; I != NumRealParts; ++I) {
3403     // Emit this instruction on each of the split pieces.
3404     for (int J = 0; J != NumOps; ++J)
3405       InputRegs[J] = ExtractedRegs[J][I];
3406 
3407     auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
3408     ResultRegs.push_back(Inst.getReg(0));
3409   }
3410 
3411   // Fill out the widened result with undef instead of creating instructions
3412   // with undef inputs.
3413   int NumUndefParts = NumParts - NumRealParts;
3414   if (NumUndefParts != 0)
3415     ResultRegs.append(NumUndefParts,
3416                       MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
3417 
3418   // Extract the possibly padded result. Use a scratch register if we need to do
3419   // a final bitcast, otherwise use the original result register.
3420   Register MergeDstReg;
3421   if (IsNarrow && DstTy.isVector())
3422     MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
3423   else
3424     MergeDstReg = DstReg;
3425 
3426   buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
3427 
3428   // Recast to vector if we narrowed a vector
3429   if (IsNarrow && DstTy.isVector())
3430     MIRBuilder.buildBitcast(DstReg, MergeDstReg);
3431 
3432   MI.eraseFromParent();
3433   return Legalized;
3434 }
3435 
3436 LegalizerHelper::LegalizeResult
3437 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
3438                                               LLT NarrowTy) {
3439   Register DstReg = MI.getOperand(0).getReg();
3440   Register SrcReg = MI.getOperand(1).getReg();
3441   int64_t Imm = MI.getOperand(2).getImm();
3442 
3443   LLT DstTy = MRI.getType(DstReg);
3444 
3445   SmallVector<Register, 8> Parts;
3446   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3447   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3448 
3449   for (Register &R : Parts)
3450     R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
3451 
3452   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3453 
3454   MI.eraseFromParent();
3455   return Legalized;
3456 }
3457 
3458 LegalizerHelper::LegalizeResult
3459 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
3460                                      LLT NarrowTy) {
3461   using namespace TargetOpcode;
3462 
3463   switch (MI.getOpcode()) {
3464   case G_IMPLICIT_DEF:
3465     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3466   case G_TRUNC:
3467   case G_AND:
3468   case G_OR:
3469   case G_XOR:
3470   case G_ADD:
3471   case G_SUB:
3472   case G_MUL:
3473   case G_PTR_ADD:
3474   case G_SMULH:
3475   case G_UMULH:
3476   case G_FADD:
3477   case G_FMUL:
3478   case G_FSUB:
3479   case G_FNEG:
3480   case G_FABS:
3481   case G_FCANONICALIZE:
3482   case G_FDIV:
3483   case G_FREM:
3484   case G_FMA:
3485   case G_FMAD:
3486   case G_FPOW:
3487   case G_FEXP:
3488   case G_FEXP2:
3489   case G_FLOG:
3490   case G_FLOG2:
3491   case G_FLOG10:
3492   case G_FNEARBYINT:
3493   case G_FCEIL:
3494   case G_FFLOOR:
3495   case G_FRINT:
3496   case G_INTRINSIC_ROUND:
3497   case G_INTRINSIC_TRUNC:
3498   case G_FCOS:
3499   case G_FSIN:
3500   case G_FSQRT:
3501   case G_BSWAP:
3502   case G_BITREVERSE:
3503   case G_SDIV:
3504   case G_UDIV:
3505   case G_SREM:
3506   case G_UREM:
3507   case G_SMIN:
3508   case G_SMAX:
3509   case G_UMIN:
3510   case G_UMAX:
3511   case G_FMINNUM:
3512   case G_FMAXNUM:
3513   case G_FMINNUM_IEEE:
3514   case G_FMAXNUM_IEEE:
3515   case G_FMINIMUM:
3516   case G_FMAXIMUM:
3517   case G_FSHL:
3518   case G_FSHR:
3519   case G_FREEZE:
3520   case G_SADDSAT:
3521   case G_SSUBSAT:
3522   case G_UADDSAT:
3523   case G_USUBSAT:
3524     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
3525   case G_SHL:
3526   case G_LSHR:
3527   case G_ASHR:
3528   case G_CTLZ:
3529   case G_CTLZ_ZERO_UNDEF:
3530   case G_CTTZ:
3531   case G_CTTZ_ZERO_UNDEF:
3532   case G_CTPOP:
3533   case G_FCOPYSIGN:
3534     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
3535   case G_ZEXT:
3536   case G_SEXT:
3537   case G_ANYEXT:
3538   case G_FPEXT:
3539   case G_FPTRUNC:
3540   case G_SITOFP:
3541   case G_UITOFP:
3542   case G_FPTOSI:
3543   case G_FPTOUI:
3544   case G_INTTOPTR:
3545   case G_PTRTOINT:
3546   case G_ADDRSPACE_CAST:
3547     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
3548   case G_ICMP:
3549   case G_FCMP:
3550     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3551   case G_SELECT:
3552     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3553   case G_PHI:
3554     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3555   case G_UNMERGE_VALUES:
3556     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3557   case G_BUILD_VECTOR:
3558     return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3559   case G_LOAD:
3560   case G_STORE:
3561     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3562   case G_SEXT_INREG:
3563     return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
3564   default:
3565     return UnableToLegalize;
3566   }
3567 }
3568 
3569 LegalizerHelper::LegalizeResult
3570 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
3571                                              const LLT HalfTy, const LLT AmtTy) {
3572 
3573   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3574   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3575   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3576 
3577   if (Amt.isNullValue()) {
3578     MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
3579     MI.eraseFromParent();
3580     return Legalized;
3581   }
3582 
3583   LLT NVT = HalfTy;
3584   unsigned NVTBits = HalfTy.getSizeInBits();
3585   unsigned VTBits = 2 * NVTBits;
3586 
3587   SrcOp Lo(Register(0)), Hi(Register(0));
3588   if (MI.getOpcode() == TargetOpcode::G_SHL) {
3589     if (Amt.ugt(VTBits)) {
3590       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3591     } else if (Amt.ugt(NVTBits)) {
3592       Lo = MIRBuilder.buildConstant(NVT, 0);
3593       Hi = MIRBuilder.buildShl(NVT, InL,
3594                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3595     } else if (Amt == NVTBits) {
3596       Lo = MIRBuilder.buildConstant(NVT, 0);
3597       Hi = InL;
3598     } else {
3599       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3600       auto OrLHS =
3601           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3602       auto OrRHS = MIRBuilder.buildLShr(
3603           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3604       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3605     }
3606   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3607     if (Amt.ugt(VTBits)) {
3608       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3609     } else if (Amt.ugt(NVTBits)) {
3610       Lo = MIRBuilder.buildLShr(NVT, InH,
3611                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3612       Hi = MIRBuilder.buildConstant(NVT, 0);
3613     } else if (Amt == NVTBits) {
3614       Lo = InH;
3615       Hi = MIRBuilder.buildConstant(NVT, 0);
3616     } else {
3617       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3618 
3619       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3620       auto OrRHS = MIRBuilder.buildShl(
3621           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3622 
3623       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3624       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3625     }
3626   } else {
3627     if (Amt.ugt(VTBits)) {
3628       Hi = Lo = MIRBuilder.buildAShr(
3629           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3630     } else if (Amt.ugt(NVTBits)) {
3631       Lo = MIRBuilder.buildAShr(NVT, InH,
3632                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3633       Hi = MIRBuilder.buildAShr(NVT, InH,
3634                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3635     } else if (Amt == NVTBits) {
3636       Lo = InH;
3637       Hi = MIRBuilder.buildAShr(NVT, InH,
3638                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3639     } else {
3640       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3641 
3642       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3643       auto OrRHS = MIRBuilder.buildShl(
3644           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3645 
3646       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3647       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3648     }
3649   }
3650 
3651   MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
3652   MI.eraseFromParent();
3653 
3654   return Legalized;
3655 }
3656 
3657 // TODO: Optimize if constant shift amount.
3658 LegalizerHelper::LegalizeResult
3659 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3660                                    LLT RequestedTy) {
3661   if (TypeIdx == 1) {
3662     Observer.changingInstr(MI);
3663     narrowScalarSrc(MI, RequestedTy, 2);
3664     Observer.changedInstr(MI);
3665     return Legalized;
3666   }
3667 
3668   Register DstReg = MI.getOperand(0).getReg();
3669   LLT DstTy = MRI.getType(DstReg);
3670   if (DstTy.isVector())
3671     return UnableToLegalize;
3672 
3673   Register Amt = MI.getOperand(2).getReg();
3674   LLT ShiftAmtTy = MRI.getType(Amt);
3675   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3676   if (DstEltSize % 2 != 0)
3677     return UnableToLegalize;
3678 
3679   // Ignore the input type. We can only go to exactly half the size of the
3680   // input. If that isn't small enough, the resulting pieces will be further
3681   // legalized.
3682   const unsigned NewBitSize = DstEltSize / 2;
3683   const LLT HalfTy = LLT::scalar(NewBitSize);
3684   const LLT CondTy = LLT::scalar(1);
3685 
3686   if (const MachineInstr *KShiftAmt =
3687           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3688     return narrowScalarShiftByConstant(
3689         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3690   }
3691 
3692   // TODO: Expand with known bits.
3693 
3694   // Handle the fully general expansion by an unknown amount.
3695   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3696 
3697   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3698   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3699   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3700 
3701   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3702   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3703 
3704   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3705   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3706   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3707 
3708   Register ResultRegs[2];
3709   switch (MI.getOpcode()) {
3710   case TargetOpcode::G_SHL: {
3711     // Short: ShAmt < NewBitSize
3712     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3713 
3714     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3715     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3716     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3717 
3718     // Long: ShAmt >= NewBitSize
3719     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
3720     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3721 
3722     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3723     auto Hi = MIRBuilder.buildSelect(
3724         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3725 
3726     ResultRegs[0] = Lo.getReg(0);
3727     ResultRegs[1] = Hi.getReg(0);
3728     break;
3729   }
3730   case TargetOpcode::G_LSHR:
3731   case TargetOpcode::G_ASHR: {
3732     // Short: ShAmt < NewBitSize
3733     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3734 
3735     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3736     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3737     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3738 
3739     // Long: ShAmt >= NewBitSize
3740     MachineInstrBuilder HiL;
3741     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3742       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
3743     } else {
3744       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3745       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
3746     }
3747     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3748                                      {InH, AmtExcess});     // Lo from Hi part.
3749 
3750     auto Lo = MIRBuilder.buildSelect(
3751         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3752 
3753     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3754 
3755     ResultRegs[0] = Lo.getReg(0);
3756     ResultRegs[1] = Hi.getReg(0);
3757     break;
3758   }
3759   default:
3760     llvm_unreachable("not a shift");
3761   }
3762 
3763   MIRBuilder.buildMerge(DstReg, ResultRegs);
3764   MI.eraseFromParent();
3765   return Legalized;
3766 }
3767 
3768 LegalizerHelper::LegalizeResult
3769 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3770                                        LLT MoreTy) {
3771   assert(TypeIdx == 0 && "Expecting only Idx 0");
3772 
3773   Observer.changingInstr(MI);
3774   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3775     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3776     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3777     moreElementsVectorSrc(MI, MoreTy, I);
3778   }
3779 
3780   MachineBasicBlock &MBB = *MI.getParent();
3781   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3782   moreElementsVectorDst(MI, MoreTy, 0);
3783   Observer.changedInstr(MI);
3784   return Legalized;
3785 }
3786 
3787 LegalizerHelper::LegalizeResult
3788 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3789                                     LLT MoreTy) {
3790   unsigned Opc = MI.getOpcode();
3791   switch (Opc) {
3792   case TargetOpcode::G_IMPLICIT_DEF:
3793   case TargetOpcode::G_LOAD: {
3794     if (TypeIdx != 0)
3795       return UnableToLegalize;
3796     Observer.changingInstr(MI);
3797     moreElementsVectorDst(MI, MoreTy, 0);
3798     Observer.changedInstr(MI);
3799     return Legalized;
3800   }
3801   case TargetOpcode::G_STORE:
3802     if (TypeIdx != 0)
3803       return UnableToLegalize;
3804     Observer.changingInstr(MI);
3805     moreElementsVectorSrc(MI, MoreTy, 0);
3806     Observer.changedInstr(MI);
3807     return Legalized;
3808   case TargetOpcode::G_AND:
3809   case TargetOpcode::G_OR:
3810   case TargetOpcode::G_XOR:
3811   case TargetOpcode::G_SMIN:
3812   case TargetOpcode::G_SMAX:
3813   case TargetOpcode::G_UMIN:
3814   case TargetOpcode::G_UMAX:
3815   case TargetOpcode::G_FMINNUM:
3816   case TargetOpcode::G_FMAXNUM:
3817   case TargetOpcode::G_FMINNUM_IEEE:
3818   case TargetOpcode::G_FMAXNUM_IEEE:
3819   case TargetOpcode::G_FMINIMUM:
3820   case TargetOpcode::G_FMAXIMUM: {
3821     Observer.changingInstr(MI);
3822     moreElementsVectorSrc(MI, MoreTy, 1);
3823     moreElementsVectorSrc(MI, MoreTy, 2);
3824     moreElementsVectorDst(MI, MoreTy, 0);
3825     Observer.changedInstr(MI);
3826     return Legalized;
3827   }
3828   case TargetOpcode::G_EXTRACT:
3829     if (TypeIdx != 1)
3830       return UnableToLegalize;
3831     Observer.changingInstr(MI);
3832     moreElementsVectorSrc(MI, MoreTy, 1);
3833     Observer.changedInstr(MI);
3834     return Legalized;
3835   case TargetOpcode::G_INSERT:
3836   case TargetOpcode::G_FREEZE:
3837     if (TypeIdx != 0)
3838       return UnableToLegalize;
3839     Observer.changingInstr(MI);
3840     moreElementsVectorSrc(MI, MoreTy, 1);
3841     moreElementsVectorDst(MI, MoreTy, 0);
3842     Observer.changedInstr(MI);
3843     return Legalized;
3844   case TargetOpcode::G_SELECT:
3845     if (TypeIdx != 0)
3846       return UnableToLegalize;
3847     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3848       return UnableToLegalize;
3849 
3850     Observer.changingInstr(MI);
3851     moreElementsVectorSrc(MI, MoreTy, 2);
3852     moreElementsVectorSrc(MI, MoreTy, 3);
3853     moreElementsVectorDst(MI, MoreTy, 0);
3854     Observer.changedInstr(MI);
3855     return Legalized;
3856   case TargetOpcode::G_UNMERGE_VALUES: {
3857     if (TypeIdx != 1)
3858       return UnableToLegalize;
3859 
3860     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3861     int NumDst = MI.getNumOperands() - 1;
3862     moreElementsVectorSrc(MI, MoreTy, NumDst);
3863 
3864     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3865     for (int I = 0; I != NumDst; ++I)
3866       MIB.addDef(MI.getOperand(I).getReg());
3867 
3868     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3869     for (int I = NumDst; I != NewNumDst; ++I)
3870       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3871 
3872     MIB.addUse(MI.getOperand(NumDst).getReg());
3873     MI.eraseFromParent();
3874     return Legalized;
3875   }
3876   case TargetOpcode::G_PHI:
3877     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3878   default:
3879     return UnableToLegalize;
3880   }
3881 }
3882 
3883 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3884                                         ArrayRef<Register> Src1Regs,
3885                                         ArrayRef<Register> Src2Regs,
3886                                         LLT NarrowTy) {
3887   MachineIRBuilder &B = MIRBuilder;
3888   unsigned SrcParts = Src1Regs.size();
3889   unsigned DstParts = DstRegs.size();
3890 
3891   unsigned DstIdx = 0; // Low bits of the result.
3892   Register FactorSum =
3893       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3894   DstRegs[DstIdx] = FactorSum;
3895 
3896   unsigned CarrySumPrevDstIdx;
3897   SmallVector<Register, 4> Factors;
3898 
3899   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3900     // Collect low parts of muls for DstIdx.
3901     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3902          i <= std::min(DstIdx, SrcParts - 1); ++i) {
3903       MachineInstrBuilder Mul =
3904           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3905       Factors.push_back(Mul.getReg(0));
3906     }
3907     // Collect high parts of muls from previous DstIdx.
3908     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3909          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3910       MachineInstrBuilder Umulh =
3911           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3912       Factors.push_back(Umulh.getReg(0));
3913     }
3914     // Add CarrySum from additions calculated for previous DstIdx.
3915     if (DstIdx != 1) {
3916       Factors.push_back(CarrySumPrevDstIdx);
3917     }
3918 
3919     Register CarrySum;
3920     // Add all factors and accumulate all carries into CarrySum.
3921     if (DstIdx != DstParts - 1) {
3922       MachineInstrBuilder Uaddo =
3923           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3924       FactorSum = Uaddo.getReg(0);
3925       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3926       for (unsigned i = 2; i < Factors.size(); ++i) {
3927         MachineInstrBuilder Uaddo =
3928             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3929         FactorSum = Uaddo.getReg(0);
3930         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3931         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3932       }
3933     } else {
3934       // Since value for the next index is not calculated, neither is CarrySum.
3935       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3936       for (unsigned i = 2; i < Factors.size(); ++i)
3937         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3938     }
3939 
3940     CarrySumPrevDstIdx = CarrySum;
3941     DstRegs[DstIdx] = FactorSum;
3942     Factors.clear();
3943   }
3944 }
3945 
3946 LegalizerHelper::LegalizeResult
3947 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3948   Register DstReg = MI.getOperand(0).getReg();
3949   Register Src1 = MI.getOperand(1).getReg();
3950   Register Src2 = MI.getOperand(2).getReg();
3951 
3952   LLT Ty = MRI.getType(DstReg);
3953   if (Ty.isVector())
3954     return UnableToLegalize;
3955 
3956   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3957   unsigned DstSize = Ty.getSizeInBits();
3958   unsigned NarrowSize = NarrowTy.getSizeInBits();
3959   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3960     return UnableToLegalize;
3961 
3962   unsigned NumDstParts = DstSize / NarrowSize;
3963   unsigned NumSrcParts = SrcSize / NarrowSize;
3964   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3965   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3966 
3967   SmallVector<Register, 2> Src1Parts, Src2Parts;
3968   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
3969   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3970   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3971   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3972 
3973   // Take only high half of registers if this is high mul.
3974   ArrayRef<Register> DstRegs(
3975       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3976   MIRBuilder.buildMerge(DstReg, DstRegs);
3977   MI.eraseFromParent();
3978   return Legalized;
3979 }
3980 
3981 LegalizerHelper::LegalizeResult
3982 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3983                                      LLT NarrowTy) {
3984   if (TypeIdx != 1)
3985     return UnableToLegalize;
3986 
3987   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3988 
3989   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3990   // FIXME: add support for when SizeOp1 isn't an exact multiple of
3991   // NarrowSize.
3992   if (SizeOp1 % NarrowSize != 0)
3993     return UnableToLegalize;
3994   int NumParts = SizeOp1 / NarrowSize;
3995 
3996   SmallVector<Register, 2> SrcRegs, DstRegs;
3997   SmallVector<uint64_t, 2> Indexes;
3998   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3999 
4000   Register OpReg = MI.getOperand(0).getReg();
4001   uint64_t OpStart = MI.getOperand(2).getImm();
4002   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4003   for (int i = 0; i < NumParts; ++i) {
4004     unsigned SrcStart = i * NarrowSize;
4005 
4006     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
4007       // No part of the extract uses this subregister, ignore it.
4008       continue;
4009     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4010       // The entire subregister is extracted, forward the value.
4011       DstRegs.push_back(SrcRegs[i]);
4012       continue;
4013     }
4014 
4015     // OpSegStart is where this destination segment would start in OpReg if it
4016     // extended infinitely in both directions.
4017     int64_t ExtractOffset;
4018     uint64_t SegSize;
4019     if (OpStart < SrcStart) {
4020       ExtractOffset = 0;
4021       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
4022     } else {
4023       ExtractOffset = OpStart - SrcStart;
4024       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
4025     }
4026 
4027     Register SegReg = SrcRegs[i];
4028     if (ExtractOffset != 0 || SegSize != NarrowSize) {
4029       // A genuine extract is needed.
4030       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4031       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
4032     }
4033 
4034     DstRegs.push_back(SegReg);
4035   }
4036 
4037   Register DstReg = MI.getOperand(0).getReg();
4038   if (MRI.getType(DstReg).isVector())
4039     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4040   else if (DstRegs.size() > 1)
4041     MIRBuilder.buildMerge(DstReg, DstRegs);
4042   else
4043     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
4044   MI.eraseFromParent();
4045   return Legalized;
4046 }
4047 
4048 LegalizerHelper::LegalizeResult
4049 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
4050                                     LLT NarrowTy) {
4051   // FIXME: Don't know how to handle secondary types yet.
4052   if (TypeIdx != 0)
4053     return UnableToLegalize;
4054 
4055   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4056   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4057 
4058   // FIXME: add support for when SizeOp0 isn't an exact multiple of
4059   // NarrowSize.
4060   if (SizeOp0 % NarrowSize != 0)
4061     return UnableToLegalize;
4062 
4063   int NumParts = SizeOp0 / NarrowSize;
4064 
4065   SmallVector<Register, 2> SrcRegs, DstRegs;
4066   SmallVector<uint64_t, 2> Indexes;
4067   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4068 
4069   Register OpReg = MI.getOperand(2).getReg();
4070   uint64_t OpStart = MI.getOperand(3).getImm();
4071   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4072   for (int i = 0; i < NumParts; ++i) {
4073     unsigned DstStart = i * NarrowSize;
4074 
4075     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
4076       // No part of the insert affects this subregister, forward the original.
4077       DstRegs.push_back(SrcRegs[i]);
4078       continue;
4079     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4080       // The entire subregister is defined by this insert, forward the new
4081       // value.
4082       DstRegs.push_back(OpReg);
4083       continue;
4084     }
4085 
4086     // OpSegStart is where this destination segment would start in OpReg if it
4087     // extended infinitely in both directions.
4088     int64_t ExtractOffset, InsertOffset;
4089     uint64_t SegSize;
4090     if (OpStart < DstStart) {
4091       InsertOffset = 0;
4092       ExtractOffset = DstStart - OpStart;
4093       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
4094     } else {
4095       InsertOffset = OpStart - DstStart;
4096       ExtractOffset = 0;
4097       SegSize =
4098         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
4099     }
4100 
4101     Register SegReg = OpReg;
4102     if (ExtractOffset != 0 || SegSize != OpSize) {
4103       // A genuine extract is needed.
4104       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4105       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
4106     }
4107 
4108     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4109     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
4110     DstRegs.push_back(DstReg);
4111   }
4112 
4113   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
4114   Register DstReg = MI.getOperand(0).getReg();
4115   if(MRI.getType(DstReg).isVector())
4116     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4117   else
4118     MIRBuilder.buildMerge(DstReg, DstRegs);
4119   MI.eraseFromParent();
4120   return Legalized;
4121 }
4122 
4123 LegalizerHelper::LegalizeResult
4124 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
4125                                    LLT NarrowTy) {
4126   Register DstReg = MI.getOperand(0).getReg();
4127   LLT DstTy = MRI.getType(DstReg);
4128 
4129   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
4130 
4131   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4132   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
4133   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4134   LLT LeftoverTy;
4135   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
4136                     Src0Regs, Src0LeftoverRegs))
4137     return UnableToLegalize;
4138 
4139   LLT Unused;
4140   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
4141                     Src1Regs, Src1LeftoverRegs))
4142     llvm_unreachable("inconsistent extractParts result");
4143 
4144   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4145     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
4146                                         {Src0Regs[I], Src1Regs[I]});
4147     DstRegs.push_back(Inst.getReg(0));
4148   }
4149 
4150   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4151     auto Inst = MIRBuilder.buildInstr(
4152       MI.getOpcode(),
4153       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
4154     DstLeftoverRegs.push_back(Inst.getReg(0));
4155   }
4156 
4157   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4158               LeftoverTy, DstLeftoverRegs);
4159 
4160   MI.eraseFromParent();
4161   return Legalized;
4162 }
4163 
4164 LegalizerHelper::LegalizeResult
4165 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
4166                                  LLT NarrowTy) {
4167   if (TypeIdx != 0)
4168     return UnableToLegalize;
4169 
4170   Register DstReg = MI.getOperand(0).getReg();
4171   Register SrcReg = MI.getOperand(1).getReg();
4172 
4173   LLT DstTy = MRI.getType(DstReg);
4174   if (DstTy.isVector())
4175     return UnableToLegalize;
4176 
4177   SmallVector<Register, 8> Parts;
4178   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4179   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
4180   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4181 
4182   MI.eraseFromParent();
4183   return Legalized;
4184 }
4185 
4186 LegalizerHelper::LegalizeResult
4187 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
4188                                     LLT NarrowTy) {
4189   if (TypeIdx != 0)
4190     return UnableToLegalize;
4191 
4192   Register CondReg = MI.getOperand(1).getReg();
4193   LLT CondTy = MRI.getType(CondReg);
4194   if (CondTy.isVector()) // TODO: Handle vselect
4195     return UnableToLegalize;
4196 
4197   Register DstReg = MI.getOperand(0).getReg();
4198   LLT DstTy = MRI.getType(DstReg);
4199 
4200   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4201   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4202   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
4203   LLT LeftoverTy;
4204   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
4205                     Src1Regs, Src1LeftoverRegs))
4206     return UnableToLegalize;
4207 
4208   LLT Unused;
4209   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
4210                     Src2Regs, Src2LeftoverRegs))
4211     llvm_unreachable("inconsistent extractParts result");
4212 
4213   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4214     auto Select = MIRBuilder.buildSelect(NarrowTy,
4215                                          CondReg, Src1Regs[I], Src2Regs[I]);
4216     DstRegs.push_back(Select.getReg(0));
4217   }
4218 
4219   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4220     auto Select = MIRBuilder.buildSelect(
4221       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
4222     DstLeftoverRegs.push_back(Select.getReg(0));
4223   }
4224 
4225   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4226               LeftoverTy, DstLeftoverRegs);
4227 
4228   MI.eraseFromParent();
4229   return Legalized;
4230 }
4231 
4232 LegalizerHelper::LegalizeResult
4233 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
4234                                   LLT NarrowTy) {
4235   if (TypeIdx != 1)
4236     return UnableToLegalize;
4237 
4238   Register DstReg = MI.getOperand(0).getReg();
4239   Register SrcReg = MI.getOperand(1).getReg();
4240   LLT DstTy = MRI.getType(DstReg);
4241   LLT SrcTy = MRI.getType(SrcReg);
4242   unsigned NarrowSize = NarrowTy.getSizeInBits();
4243 
4244   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4245     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
4246 
4247     MachineIRBuilder &B = MIRBuilder;
4248     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4249     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
4250     auto C_0 = B.buildConstant(NarrowTy, 0);
4251     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4252                                 UnmergeSrc.getReg(1), C_0);
4253     auto LoCTLZ = IsUndef ?
4254       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4255       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
4256     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4257     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
4258     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
4259     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
4260 
4261     MI.eraseFromParent();
4262     return Legalized;
4263   }
4264 
4265   return UnableToLegalize;
4266 }
4267 
4268 LegalizerHelper::LegalizeResult
4269 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
4270                                   LLT NarrowTy) {
4271   if (TypeIdx != 1)
4272     return UnableToLegalize;
4273 
4274   Register DstReg = MI.getOperand(0).getReg();
4275   Register SrcReg = MI.getOperand(1).getReg();
4276   LLT DstTy = MRI.getType(DstReg);
4277   LLT SrcTy = MRI.getType(SrcReg);
4278   unsigned NarrowSize = NarrowTy.getSizeInBits();
4279 
4280   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4281     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
4282 
4283     MachineIRBuilder &B = MIRBuilder;
4284     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4285     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
4286     auto C_0 = B.buildConstant(NarrowTy, 0);
4287     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4288                                 UnmergeSrc.getReg(0), C_0);
4289     auto HiCTTZ = IsUndef ?
4290       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
4291       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
4292     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4293     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
4294     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
4295     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
4296 
4297     MI.eraseFromParent();
4298     return Legalized;
4299   }
4300 
4301   return UnableToLegalize;
4302 }
4303 
4304 LegalizerHelper::LegalizeResult
4305 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
4306                                    LLT NarrowTy) {
4307   if (TypeIdx != 1)
4308     return UnableToLegalize;
4309 
4310   Register DstReg = MI.getOperand(0).getReg();
4311   LLT DstTy = MRI.getType(DstReg);
4312   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
4313   unsigned NarrowSize = NarrowTy.getSizeInBits();
4314 
4315   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4316     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
4317 
4318     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
4319     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
4320     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
4321 
4322     MI.eraseFromParent();
4323     return Legalized;
4324   }
4325 
4326   return UnableToLegalize;
4327 }
4328 
4329 LegalizerHelper::LegalizeResult
4330 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4331   unsigned Opc = MI.getOpcode();
4332   const auto &TII = MIRBuilder.getTII();
4333   auto isSupported = [this](const LegalityQuery &Q) {
4334     auto QAction = LI.getAction(Q).Action;
4335     return QAction == Legal || QAction == Libcall || QAction == Custom;
4336   };
4337   switch (Opc) {
4338   default:
4339     return UnableToLegalize;
4340   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
4341     // This trivially expands to CTLZ.
4342     Observer.changingInstr(MI);
4343     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
4344     Observer.changedInstr(MI);
4345     return Legalized;
4346   }
4347   case TargetOpcode::G_CTLZ: {
4348     Register DstReg = MI.getOperand(0).getReg();
4349     Register SrcReg = MI.getOperand(1).getReg();
4350     LLT DstTy = MRI.getType(DstReg);
4351     LLT SrcTy = MRI.getType(SrcReg);
4352     unsigned Len = SrcTy.getSizeInBits();
4353 
4354     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4355       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
4356       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
4357       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
4358       auto ICmp = MIRBuilder.buildICmp(
4359           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
4360       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4361       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
4362       MI.eraseFromParent();
4363       return Legalized;
4364     }
4365     // for now, we do this:
4366     // NewLen = NextPowerOf2(Len);
4367     // x = x | (x >> 1);
4368     // x = x | (x >> 2);
4369     // ...
4370     // x = x | (x >>16);
4371     // x = x | (x >>32); // for 64-bit input
4372     // Upto NewLen/2
4373     // return Len - popcount(x);
4374     //
4375     // Ref: "Hacker's Delight" by Henry Warren
4376     Register Op = SrcReg;
4377     unsigned NewLen = PowerOf2Ceil(Len);
4378     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
4379       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
4380       auto MIBOp = MIRBuilder.buildOr(
4381           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
4382       Op = MIBOp.getReg(0);
4383     }
4384     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
4385     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
4386                         MIBPop);
4387     MI.eraseFromParent();
4388     return Legalized;
4389   }
4390   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
4391     // This trivially expands to CTTZ.
4392     Observer.changingInstr(MI);
4393     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
4394     Observer.changedInstr(MI);
4395     return Legalized;
4396   }
4397   case TargetOpcode::G_CTTZ: {
4398     Register DstReg = MI.getOperand(0).getReg();
4399     Register SrcReg = MI.getOperand(1).getReg();
4400     LLT DstTy = MRI.getType(DstReg);
4401     LLT SrcTy = MRI.getType(SrcReg);
4402 
4403     unsigned Len = SrcTy.getSizeInBits();
4404     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4405       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
4406       // zero.
4407       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
4408       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
4409       auto ICmp = MIRBuilder.buildICmp(
4410           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
4411       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4412       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
4413       MI.eraseFromParent();
4414       return Legalized;
4415     }
4416     // for now, we use: { return popcount(~x & (x - 1)); }
4417     // unless the target has ctlz but not ctpop, in which case we use:
4418     // { return 32 - nlz(~x & (x-1)); }
4419     // Ref: "Hacker's Delight" by Henry Warren
4420     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
4421     auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
4422     auto MIBTmp = MIRBuilder.buildAnd(
4423         Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
4424     if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
4425         isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
4426       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
4427       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
4428                           MIRBuilder.buildCTLZ(Ty, MIBTmp));
4429       MI.eraseFromParent();
4430       return Legalized;
4431     }
4432     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
4433     MI.getOperand(1).setReg(MIBTmp.getReg(0));
4434     return Legalized;
4435   }
4436   case TargetOpcode::G_CTPOP: {
4437     unsigned Size = Ty.getSizeInBits();
4438     MachineIRBuilder &B = MIRBuilder;
4439 
4440     // Count set bits in blocks of 2 bits. Default approach would be
4441     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
4442     // We use following formula instead:
4443     // B2Count = val - { (val >> 1) & 0x55555555 }
4444     // since it gives same result in blocks of 2 with one instruction less.
4445     auto C_1 = B.buildConstant(Ty, 1);
4446     auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1);
4447     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
4448     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
4449     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
4450     auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi);
4451 
4452     // In order to get count in blocks of 4 add values from adjacent block of 2.
4453     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
4454     auto C_2 = B.buildConstant(Ty, 2);
4455     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
4456     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
4457     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
4458     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
4459     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
4460     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
4461 
4462     // For count in blocks of 8 bits we don't have to mask high 4 bits before
4463     // addition since count value sits in range {0,...,8} and 4 bits are enough
4464     // to hold such binary values. After addition high 4 bits still hold count
4465     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
4466     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
4467     auto C_4 = B.buildConstant(Ty, 4);
4468     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
4469     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
4470     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
4471     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
4472     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
4473 
4474     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
4475     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
4476     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
4477     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
4478     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
4479 
4480     // Shift count result from 8 high bits to low bits.
4481     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
4482     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
4483 
4484     MI.eraseFromParent();
4485     return Legalized;
4486   }
4487   }
4488 }
4489 
4490 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
4491 // representation.
4492 LegalizerHelper::LegalizeResult
4493 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
4494   Register Dst = MI.getOperand(0).getReg();
4495   Register Src = MI.getOperand(1).getReg();
4496   const LLT S64 = LLT::scalar(64);
4497   const LLT S32 = LLT::scalar(32);
4498   const LLT S1 = LLT::scalar(1);
4499 
4500   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
4501 
4502   // unsigned cul2f(ulong u) {
4503   //   uint lz = clz(u);
4504   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
4505   //   u = (u << lz) & 0x7fffffffffffffffUL;
4506   //   ulong t = u & 0xffffffffffUL;
4507   //   uint v = (e << 23) | (uint)(u >> 40);
4508   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
4509   //   return as_float(v + r);
4510   // }
4511 
4512   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
4513   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
4514 
4515   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
4516 
4517   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
4518   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
4519 
4520   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
4521   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
4522 
4523   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
4524   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
4525 
4526   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
4527 
4528   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
4529   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
4530 
4531   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
4532   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
4533   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
4534 
4535   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
4536   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
4537   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
4538   auto One = MIRBuilder.buildConstant(S32, 1);
4539 
4540   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
4541   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
4542   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
4543   MIRBuilder.buildAdd(Dst, V, R);
4544 
4545   MI.eraseFromParent();
4546   return Legalized;
4547 }
4548 
4549 LegalizerHelper::LegalizeResult
4550 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4551   Register Dst = MI.getOperand(0).getReg();
4552   Register Src = MI.getOperand(1).getReg();
4553   LLT DstTy = MRI.getType(Dst);
4554   LLT SrcTy = MRI.getType(Src);
4555 
4556   if (SrcTy == LLT::scalar(1)) {
4557     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
4558     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4559     MIRBuilder.buildSelect(Dst, Src, True, False);
4560     MI.eraseFromParent();
4561     return Legalized;
4562   }
4563 
4564   if (SrcTy != LLT::scalar(64))
4565     return UnableToLegalize;
4566 
4567   if (DstTy == LLT::scalar(32)) {
4568     // TODO: SelectionDAG has several alternative expansions to port which may
4569     // be more reasonble depending on the available instructions. If a target
4570     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
4571     // intermediate type, this is probably worse.
4572     return lowerU64ToF32BitOps(MI);
4573   }
4574 
4575   return UnableToLegalize;
4576 }
4577 
4578 LegalizerHelper::LegalizeResult
4579 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4580   Register Dst = MI.getOperand(0).getReg();
4581   Register Src = MI.getOperand(1).getReg();
4582   LLT DstTy = MRI.getType(Dst);
4583   LLT SrcTy = MRI.getType(Src);
4584 
4585   const LLT S64 = LLT::scalar(64);
4586   const LLT S32 = LLT::scalar(32);
4587   const LLT S1 = LLT::scalar(1);
4588 
4589   if (SrcTy == S1) {
4590     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
4591     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4592     MIRBuilder.buildSelect(Dst, Src, True, False);
4593     MI.eraseFromParent();
4594     return Legalized;
4595   }
4596 
4597   if (SrcTy != S64)
4598     return UnableToLegalize;
4599 
4600   if (DstTy == S32) {
4601     // signed cl2f(long l) {
4602     //   long s = l >> 63;
4603     //   float r = cul2f((l + s) ^ s);
4604     //   return s ? -r : r;
4605     // }
4606     Register L = Src;
4607     auto SignBit = MIRBuilder.buildConstant(S64, 63);
4608     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
4609 
4610     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
4611     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
4612     auto R = MIRBuilder.buildUITOFP(S32, Xor);
4613 
4614     auto RNeg = MIRBuilder.buildFNeg(S32, R);
4615     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
4616                                             MIRBuilder.buildConstant(S64, 0));
4617     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
4618     MI.eraseFromParent();
4619     return Legalized;
4620   }
4621 
4622   return UnableToLegalize;
4623 }
4624 
4625 LegalizerHelper::LegalizeResult
4626 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4627   Register Dst = MI.getOperand(0).getReg();
4628   Register Src = MI.getOperand(1).getReg();
4629   LLT DstTy = MRI.getType(Dst);
4630   LLT SrcTy = MRI.getType(Src);
4631   const LLT S64 = LLT::scalar(64);
4632   const LLT S32 = LLT::scalar(32);
4633 
4634   if (SrcTy != S64 && SrcTy != S32)
4635     return UnableToLegalize;
4636   if (DstTy != S32 && DstTy != S64)
4637     return UnableToLegalize;
4638 
4639   // FPTOSI gives same result as FPTOUI for positive signed integers.
4640   // FPTOUI needs to deal with fp values that convert to unsigned integers
4641   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
4642 
4643   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
4644   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
4645                                                 : APFloat::IEEEdouble(),
4646                     APInt::getNullValue(SrcTy.getSizeInBits()));
4647   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
4648 
4649   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
4650 
4651   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
4652   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
4653   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
4654   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
4655   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
4656   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
4657   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
4658 
4659   const LLT S1 = LLT::scalar(1);
4660 
4661   MachineInstrBuilder FCMP =
4662       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
4663   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
4664 
4665   MI.eraseFromParent();
4666   return Legalized;
4667 }
4668 
4669 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
4670   Register Dst = MI.getOperand(0).getReg();
4671   Register Src = MI.getOperand(1).getReg();
4672   LLT DstTy = MRI.getType(Dst);
4673   LLT SrcTy = MRI.getType(Src);
4674   const LLT S64 = LLT::scalar(64);
4675   const LLT S32 = LLT::scalar(32);
4676 
4677   // FIXME: Only f32 to i64 conversions are supported.
4678   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
4679     return UnableToLegalize;
4680 
4681   // Expand f32 -> i64 conversion
4682   // This algorithm comes from compiler-rt's implementation of fixsfdi:
4683   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
4684 
4685   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
4686 
4687   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
4688   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
4689 
4690   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
4691   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
4692 
4693   auto SignMask = MIRBuilder.buildConstant(SrcTy,
4694                                            APInt::getSignMask(SrcEltBits));
4695   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
4696   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
4697   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
4698   Sign = MIRBuilder.buildSExt(DstTy, Sign);
4699 
4700   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
4701   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
4702   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
4703 
4704   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
4705   R = MIRBuilder.buildZExt(DstTy, R);
4706 
4707   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
4708   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
4709   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
4710   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
4711 
4712   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
4713   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
4714 
4715   const LLT S1 = LLT::scalar(1);
4716   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
4717                                     S1, Exponent, ExponentLoBit);
4718 
4719   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
4720 
4721   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
4722   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
4723 
4724   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
4725 
4726   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
4727                                           S1, Exponent, ZeroSrcTy);
4728 
4729   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
4730   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
4731 
4732   MI.eraseFromParent();
4733   return Legalized;
4734 }
4735 
4736 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
4737 LegalizerHelper::LegalizeResult
4738 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
4739   Register Dst = MI.getOperand(0).getReg();
4740   Register Src = MI.getOperand(1).getReg();
4741 
4742   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
4743     return UnableToLegalize;
4744 
4745   const unsigned ExpMask = 0x7ff;
4746   const unsigned ExpBiasf64 = 1023;
4747   const unsigned ExpBiasf16 = 15;
4748   const LLT S32 = LLT::scalar(32);
4749   const LLT S1 = LLT::scalar(1);
4750 
4751   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
4752   Register U = Unmerge.getReg(0);
4753   Register UH = Unmerge.getReg(1);
4754 
4755   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
4756   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
4757 
4758   // Subtract the fp64 exponent bias (1023) to get the real exponent and
4759   // add the f16 bias (15) to get the biased exponent for the f16 format.
4760   E = MIRBuilder.buildAdd(
4761     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
4762 
4763   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
4764   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
4765 
4766   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
4767                                        MIRBuilder.buildConstant(S32, 0x1ff));
4768   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
4769 
4770   auto Zero = MIRBuilder.buildConstant(S32, 0);
4771   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
4772   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
4773   M = MIRBuilder.buildOr(S32, M, Lo40Set);
4774 
4775   // (M != 0 ? 0x0200 : 0) | 0x7c00;
4776   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
4777   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
4778   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
4779 
4780   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
4781   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
4782 
4783   // N = M | (E << 12);
4784   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
4785   auto N = MIRBuilder.buildOr(S32, M, EShl12);
4786 
4787   // B = clamp(1-E, 0, 13);
4788   auto One = MIRBuilder.buildConstant(S32, 1);
4789   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
4790   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
4791   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
4792 
4793   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
4794                                        MIRBuilder.buildConstant(S32, 0x1000));
4795 
4796   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
4797   auto D0 = MIRBuilder.buildShl(S32, D, B);
4798 
4799   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
4800                                              D0, SigSetHigh);
4801   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
4802   D = MIRBuilder.buildOr(S32, D, D1);
4803 
4804   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
4805   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
4806 
4807   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
4808   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
4809 
4810   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
4811                                        MIRBuilder.buildConstant(S32, 3));
4812   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
4813 
4814   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
4815                                        MIRBuilder.buildConstant(S32, 5));
4816   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
4817 
4818   V1 = MIRBuilder.buildOr(S32, V0, V1);
4819   V = MIRBuilder.buildAdd(S32, V, V1);
4820 
4821   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
4822                                        E, MIRBuilder.buildConstant(S32, 30));
4823   V = MIRBuilder.buildSelect(S32, CmpEGt30,
4824                              MIRBuilder.buildConstant(S32, 0x7c00), V);
4825 
4826   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
4827                                          E, MIRBuilder.buildConstant(S32, 1039));
4828   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
4829 
4830   // Extract the sign bit.
4831   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
4832   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
4833 
4834   // Insert the sign bit
4835   V = MIRBuilder.buildOr(S32, Sign, V);
4836 
4837   MIRBuilder.buildTrunc(Dst, V);
4838   MI.eraseFromParent();
4839   return Legalized;
4840 }
4841 
4842 LegalizerHelper::LegalizeResult
4843 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4844   Register Dst = MI.getOperand(0).getReg();
4845   Register Src = MI.getOperand(1).getReg();
4846 
4847   LLT DstTy = MRI.getType(Dst);
4848   LLT SrcTy = MRI.getType(Src);
4849   const LLT S64 = LLT::scalar(64);
4850   const LLT S16 = LLT::scalar(16);
4851 
4852   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
4853     return lowerFPTRUNC_F64_TO_F16(MI);
4854 
4855   return UnableToLegalize;
4856 }
4857 
4858 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
4859 // multiplication tree.
4860 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
4861   Register Dst = MI.getOperand(0).getReg();
4862   Register Src0 = MI.getOperand(1).getReg();
4863   Register Src1 = MI.getOperand(2).getReg();
4864   LLT Ty = MRI.getType(Dst);
4865 
4866   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
4867   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
4868   MI.eraseFromParent();
4869   return Legalized;
4870 }
4871 
4872 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
4873   switch (Opc) {
4874   case TargetOpcode::G_SMIN:
4875     return CmpInst::ICMP_SLT;
4876   case TargetOpcode::G_SMAX:
4877     return CmpInst::ICMP_SGT;
4878   case TargetOpcode::G_UMIN:
4879     return CmpInst::ICMP_ULT;
4880   case TargetOpcode::G_UMAX:
4881     return CmpInst::ICMP_UGT;
4882   default:
4883     llvm_unreachable("not in integer min/max");
4884   }
4885 }
4886 
4887 LegalizerHelper::LegalizeResult
4888 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4889   Register Dst = MI.getOperand(0).getReg();
4890   Register Src0 = MI.getOperand(1).getReg();
4891   Register Src1 = MI.getOperand(2).getReg();
4892 
4893   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
4894   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
4895 
4896   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
4897   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
4898 
4899   MI.eraseFromParent();
4900   return Legalized;
4901 }
4902 
4903 LegalizerHelper::LegalizeResult
4904 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4905   Register Dst = MI.getOperand(0).getReg();
4906   Register Src0 = MI.getOperand(1).getReg();
4907   Register Src1 = MI.getOperand(2).getReg();
4908 
4909   const LLT Src0Ty = MRI.getType(Src0);
4910   const LLT Src1Ty = MRI.getType(Src1);
4911 
4912   const int Src0Size = Src0Ty.getScalarSizeInBits();
4913   const int Src1Size = Src1Ty.getScalarSizeInBits();
4914 
4915   auto SignBitMask = MIRBuilder.buildConstant(
4916     Src0Ty, APInt::getSignMask(Src0Size));
4917 
4918   auto NotSignBitMask = MIRBuilder.buildConstant(
4919     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
4920 
4921   auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
4922   MachineInstr *Or;
4923 
4924   if (Src0Ty == Src1Ty) {
4925     auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask);
4926     Or = MIRBuilder.buildOr(Dst, And0, And1);
4927   } else if (Src0Size > Src1Size) {
4928     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
4929     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
4930     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
4931     auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
4932     Or = MIRBuilder.buildOr(Dst, And0, And1);
4933   } else {
4934     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
4935     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
4936     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
4937     auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
4938     Or = MIRBuilder.buildOr(Dst, And0, And1);
4939   }
4940 
4941   // Be careful about setting nsz/nnan/ninf on every instruction, since the
4942   // constants are a nan and -0.0, but the final result should preserve
4943   // everything.
4944   if (unsigned Flags = MI.getFlags())
4945     Or->setFlags(Flags);
4946 
4947   MI.eraseFromParent();
4948   return Legalized;
4949 }
4950 
4951 LegalizerHelper::LegalizeResult
4952 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
4953   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4954     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4955 
4956   Register Dst = MI.getOperand(0).getReg();
4957   Register Src0 = MI.getOperand(1).getReg();
4958   Register Src1 = MI.getOperand(2).getReg();
4959   LLT Ty = MRI.getType(Dst);
4960 
4961   if (!MI.getFlag(MachineInstr::FmNoNans)) {
4962     // Insert canonicalizes if it's possible we need to quiet to get correct
4963     // sNaN behavior.
4964 
4965     // Note this must be done here, and not as an optimization combine in the
4966     // absence of a dedicate quiet-snan instruction as we're using an
4967     // omni-purpose G_FCANONICALIZE.
4968     if (!isKnownNeverSNaN(Src0, MRI))
4969       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4970 
4971     if (!isKnownNeverSNaN(Src1, MRI))
4972       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4973   }
4974 
4975   // If there are no nans, it's safe to simply replace this with the non-IEEE
4976   // version.
4977   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4978   MI.eraseFromParent();
4979   return Legalized;
4980 }
4981 
4982 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
4983   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4984   Register DstReg = MI.getOperand(0).getReg();
4985   LLT Ty = MRI.getType(DstReg);
4986   unsigned Flags = MI.getFlags();
4987 
4988   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4989                                   Flags);
4990   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4991   MI.eraseFromParent();
4992   return Legalized;
4993 }
4994 
4995 LegalizerHelper::LegalizeResult
4996 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
4997   Register DstReg = MI.getOperand(0).getReg();
4998   Register X = MI.getOperand(1).getReg();
4999   const unsigned Flags = MI.getFlags();
5000   const LLT Ty = MRI.getType(DstReg);
5001   const LLT CondTy = Ty.changeElementSize(1);
5002 
5003   // round(x) =>
5004   //  t = trunc(x);
5005   //  d = fabs(x - t);
5006   //  o = copysign(1.0f, x);
5007   //  return t + (d >= 0.5 ? o : 0.0);
5008 
5009   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
5010 
5011   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
5012   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
5013   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5014   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
5015   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
5016   auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
5017 
5018   auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
5019                                   Flags);
5020   auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
5021 
5022   MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
5023 
5024   MI.eraseFromParent();
5025   return Legalized;
5026 }
5027 
5028 LegalizerHelper::LegalizeResult
5029 LegalizerHelper::lowerFFloor(MachineInstr &MI) {
5030   Register DstReg = MI.getOperand(0).getReg();
5031   Register SrcReg = MI.getOperand(1).getReg();
5032   unsigned Flags = MI.getFlags();
5033   LLT Ty = MRI.getType(DstReg);
5034   const LLT CondTy = Ty.changeElementSize(1);
5035 
5036   // result = trunc(src);
5037   // if (src < 0.0 && src != result)
5038   //   result += -1.0.
5039 
5040   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
5041   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5042 
5043   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
5044                                   SrcReg, Zero, Flags);
5045   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
5046                                       SrcReg, Trunc, Flags);
5047   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
5048   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
5049 
5050   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
5051   MI.eraseFromParent();
5052   return Legalized;
5053 }
5054 
5055 LegalizerHelper::LegalizeResult
5056 LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
5057   const unsigned NumOps = MI.getNumOperands();
5058   Register DstReg = MI.getOperand(0).getReg();
5059   Register Src0Reg = MI.getOperand(1).getReg();
5060   LLT DstTy = MRI.getType(DstReg);
5061   LLT SrcTy = MRI.getType(Src0Reg);
5062   unsigned PartSize = SrcTy.getSizeInBits();
5063 
5064   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
5065   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
5066 
5067   for (unsigned I = 2; I != NumOps; ++I) {
5068     const unsigned Offset = (I - 1) * PartSize;
5069 
5070     Register SrcReg = MI.getOperand(I).getReg();
5071     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
5072 
5073     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
5074       MRI.createGenericVirtualRegister(WideTy);
5075 
5076     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
5077     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
5078     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
5079     ResultReg = NextResult;
5080   }
5081 
5082   if (DstTy.isPointer()) {
5083     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
5084           DstTy.getAddressSpace())) {
5085       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
5086       return UnableToLegalize;
5087     }
5088 
5089     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
5090   }
5091 
5092   MI.eraseFromParent();
5093   return Legalized;
5094 }
5095 
5096 LegalizerHelper::LegalizeResult
5097 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
5098   const unsigned NumDst = MI.getNumOperands() - 1;
5099   Register SrcReg = MI.getOperand(NumDst).getReg();
5100   Register Dst0Reg = MI.getOperand(0).getReg();
5101   LLT DstTy = MRI.getType(Dst0Reg);
5102   if (DstTy.isPointer())
5103     return UnableToLegalize; // TODO
5104 
5105   SrcReg = coerceToScalar(SrcReg);
5106   if (!SrcReg)
5107     return UnableToLegalize;
5108 
5109   // Expand scalarizing unmerge as bitcast to integer and shift.
5110   LLT IntTy = MRI.getType(SrcReg);
5111 
5112   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
5113 
5114   const unsigned DstSize = DstTy.getSizeInBits();
5115   unsigned Offset = DstSize;
5116   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
5117     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
5118     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
5119     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
5120   }
5121 
5122   MI.eraseFromParent();
5123   return Legalized;
5124 }
5125 
5126 LegalizerHelper::LegalizeResult
5127 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
5128   Register DstReg = MI.getOperand(0).getReg();
5129   Register Src0Reg = MI.getOperand(1).getReg();
5130   Register Src1Reg = MI.getOperand(2).getReg();
5131   LLT Src0Ty = MRI.getType(Src0Reg);
5132   LLT DstTy = MRI.getType(DstReg);
5133   LLT IdxTy = LLT::scalar(32);
5134 
5135   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5136 
5137   if (DstTy.isScalar()) {
5138     if (Src0Ty.isVector())
5139       return UnableToLegalize;
5140 
5141     // This is just a SELECT.
5142     assert(Mask.size() == 1 && "Expected a single mask element");
5143     Register Val;
5144     if (Mask[0] < 0 || Mask[0] > 1)
5145       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
5146     else
5147       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
5148     MIRBuilder.buildCopy(DstReg, Val);
5149     MI.eraseFromParent();
5150     return Legalized;
5151   }
5152 
5153   Register Undef;
5154   SmallVector<Register, 32> BuildVec;
5155   LLT EltTy = DstTy.getElementType();
5156 
5157   for (int Idx : Mask) {
5158     if (Idx < 0) {
5159       if (!Undef.isValid())
5160         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
5161       BuildVec.push_back(Undef);
5162       continue;
5163     }
5164 
5165     if (Src0Ty.isScalar()) {
5166       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
5167     } else {
5168       int NumElts = Src0Ty.getNumElements();
5169       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
5170       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
5171       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
5172       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
5173       BuildVec.push_back(Extract.getReg(0));
5174     }
5175   }
5176 
5177   MIRBuilder.buildBuildVector(DstReg, BuildVec);
5178   MI.eraseFromParent();
5179   return Legalized;
5180 }
5181 
5182 LegalizerHelper::LegalizeResult
5183 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
5184   const auto &MF = *MI.getMF();
5185   const auto &TFI = *MF.getSubtarget().getFrameLowering();
5186   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
5187     return UnableToLegalize;
5188 
5189   Register Dst = MI.getOperand(0).getReg();
5190   Register AllocSize = MI.getOperand(1).getReg();
5191   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
5192 
5193   LLT PtrTy = MRI.getType(Dst);
5194   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
5195 
5196   const auto &TLI = *MF.getSubtarget().getTargetLowering();
5197   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
5198   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
5199   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
5200 
5201   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
5202   // have to generate an extra instruction to negate the alloc and then use
5203   // G_PTR_ADD to add the negative offset.
5204   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
5205   if (Alignment > Align(1)) {
5206     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
5207     AlignMask.negate();
5208     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
5209     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
5210   }
5211 
5212   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
5213   MIRBuilder.buildCopy(SPReg, SPTmp);
5214   MIRBuilder.buildCopy(Dst, SPTmp);
5215 
5216   MI.eraseFromParent();
5217   return Legalized;
5218 }
5219 
5220 LegalizerHelper::LegalizeResult
5221 LegalizerHelper::lowerExtract(MachineInstr &MI) {
5222   Register Dst = MI.getOperand(0).getReg();
5223   Register Src = MI.getOperand(1).getReg();
5224   unsigned Offset = MI.getOperand(2).getImm();
5225 
5226   LLT DstTy = MRI.getType(Dst);
5227   LLT SrcTy = MRI.getType(Src);
5228 
5229   if (DstTy.isScalar() &&
5230       (SrcTy.isScalar() ||
5231        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
5232     LLT SrcIntTy = SrcTy;
5233     if (!SrcTy.isScalar()) {
5234       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
5235       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
5236     }
5237 
5238     if (Offset == 0)
5239       MIRBuilder.buildTrunc(Dst, Src);
5240     else {
5241       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
5242       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
5243       MIRBuilder.buildTrunc(Dst, Shr);
5244     }
5245 
5246     MI.eraseFromParent();
5247     return Legalized;
5248   }
5249 
5250   return UnableToLegalize;
5251 }
5252 
5253 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
5254   Register Dst = MI.getOperand(0).getReg();
5255   Register Src = MI.getOperand(1).getReg();
5256   Register InsertSrc = MI.getOperand(2).getReg();
5257   uint64_t Offset = MI.getOperand(3).getImm();
5258 
5259   LLT DstTy = MRI.getType(Src);
5260   LLT InsertTy = MRI.getType(InsertSrc);
5261 
5262   if (InsertTy.isVector() ||
5263       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
5264     return UnableToLegalize;
5265 
5266   const DataLayout &DL = MIRBuilder.getDataLayout();
5267   if ((DstTy.isPointer() &&
5268        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
5269       (InsertTy.isPointer() &&
5270        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
5271     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
5272     return UnableToLegalize;
5273   }
5274 
5275   LLT IntDstTy = DstTy;
5276 
5277   if (!DstTy.isScalar()) {
5278     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
5279     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
5280   }
5281 
5282   if (!InsertTy.isScalar()) {
5283     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
5284     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
5285   }
5286 
5287   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
5288   if (Offset != 0) {
5289     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
5290     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
5291   }
5292 
5293   APInt MaskVal = APInt::getBitsSetWithWrap(
5294       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
5295 
5296   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
5297   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
5298   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
5299 
5300   MIRBuilder.buildCast(Dst, Or);
5301   MI.eraseFromParent();
5302   return Legalized;
5303 }
5304 
5305 LegalizerHelper::LegalizeResult
5306 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
5307   Register Dst0 = MI.getOperand(0).getReg();
5308   Register Dst1 = MI.getOperand(1).getReg();
5309   Register LHS = MI.getOperand(2).getReg();
5310   Register RHS = MI.getOperand(3).getReg();
5311   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
5312 
5313   LLT Ty = MRI.getType(Dst0);
5314   LLT BoolTy = MRI.getType(Dst1);
5315 
5316   if (IsAdd)
5317     MIRBuilder.buildAdd(Dst0, LHS, RHS);
5318   else
5319     MIRBuilder.buildSub(Dst0, LHS, RHS);
5320 
5321   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
5322 
5323   auto Zero = MIRBuilder.buildConstant(Ty, 0);
5324 
5325   // For an addition, the result should be less than one of the operands (LHS)
5326   // if and only if the other operand (RHS) is negative, otherwise there will
5327   // be overflow.
5328   // For a subtraction, the result should be less than one of the operands
5329   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
5330   // otherwise there will be overflow.
5331   auto ResultLowerThanLHS =
5332       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
5333   auto ConditionRHS = MIRBuilder.buildICmp(
5334       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
5335 
5336   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
5337   MI.eraseFromParent();
5338   return Legalized;
5339 }
5340 
5341 LegalizerHelper::LegalizeResult
5342 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
5343   Register Res = MI.getOperand(0).getReg();
5344   Register LHS = MI.getOperand(1).getReg();
5345   Register RHS = MI.getOperand(2).getReg();
5346   LLT Ty = MRI.getType(Res);
5347   bool IsSigned;
5348   bool IsAdd;
5349   unsigned BaseOp;
5350   switch (MI.getOpcode()) {
5351   default:
5352     llvm_unreachable("unexpected addsat/subsat opcode");
5353   case TargetOpcode::G_UADDSAT:
5354     IsSigned = false;
5355     IsAdd = true;
5356     BaseOp = TargetOpcode::G_ADD;
5357     break;
5358   case TargetOpcode::G_SADDSAT:
5359     IsSigned = true;
5360     IsAdd = true;
5361     BaseOp = TargetOpcode::G_ADD;
5362     break;
5363   case TargetOpcode::G_USUBSAT:
5364     IsSigned = false;
5365     IsAdd = false;
5366     BaseOp = TargetOpcode::G_SUB;
5367     break;
5368   case TargetOpcode::G_SSUBSAT:
5369     IsSigned = true;
5370     IsAdd = false;
5371     BaseOp = TargetOpcode::G_SUB;
5372     break;
5373   }
5374 
5375   if (IsSigned) {
5376     // sadd.sat(a, b) ->
5377     //   hi = 0x7fffffff - smax(a, 0)
5378     //   lo = 0x80000000 - smin(a, 0)
5379     //   a + smin(smax(lo, b), hi)
5380     // ssub.sat(a, b) ->
5381     //   lo = smax(a, -1) - 0x7fffffff
5382     //   hi = smin(a, -1) - 0x80000000
5383     //   a - smin(smax(lo, b), hi)
5384     // TODO: AMDGPU can use a "median of 3" instruction here:
5385     //   a +/- med3(lo, b, hi)
5386     uint64_t NumBits = Ty.getScalarSizeInBits();
5387     auto MaxVal =
5388         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
5389     auto MinVal =
5390         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
5391     MachineInstrBuilder Hi, Lo;
5392     if (IsAdd) {
5393       auto Zero = MIRBuilder.buildConstant(Ty, 0);
5394       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
5395       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
5396     } else {
5397       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
5398       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
5399                                MaxVal);
5400       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
5401                                MinVal);
5402     }
5403     auto RHSClamped =
5404         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
5405     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
5406   } else {
5407     // uadd.sat(a, b) -> a + umin(~a, b)
5408     // usub.sat(a, b) -> a - umin(a, b)
5409     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
5410     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
5411     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
5412   }
5413 
5414   MI.eraseFromParent();
5415   return Legalized;
5416 }
5417 
5418 LegalizerHelper::LegalizeResult
5419 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
5420   Register Res = MI.getOperand(0).getReg();
5421   Register LHS = MI.getOperand(1).getReg();
5422   Register RHS = MI.getOperand(2).getReg();
5423   LLT Ty = MRI.getType(Res);
5424   LLT BoolTy = Ty.changeElementSize(1);
5425   bool IsSigned;
5426   bool IsAdd;
5427   unsigned OverflowOp;
5428   switch (MI.getOpcode()) {
5429   default:
5430     llvm_unreachable("unexpected addsat/subsat opcode");
5431   case TargetOpcode::G_UADDSAT:
5432     IsSigned = false;
5433     IsAdd = true;
5434     OverflowOp = TargetOpcode::G_UADDO;
5435     break;
5436   case TargetOpcode::G_SADDSAT:
5437     IsSigned = true;
5438     IsAdd = true;
5439     OverflowOp = TargetOpcode::G_SADDO;
5440     break;
5441   case TargetOpcode::G_USUBSAT:
5442     IsSigned = false;
5443     IsAdd = false;
5444     OverflowOp = TargetOpcode::G_USUBO;
5445     break;
5446   case TargetOpcode::G_SSUBSAT:
5447     IsSigned = true;
5448     IsAdd = false;
5449     OverflowOp = TargetOpcode::G_SSUBO;
5450     break;
5451   }
5452 
5453   auto OverflowRes =
5454       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
5455   Register Tmp = OverflowRes.getReg(0);
5456   Register Ov = OverflowRes.getReg(1);
5457   MachineInstrBuilder Clamp;
5458   if (IsSigned) {
5459     // sadd.sat(a, b) ->
5460     //   {tmp, ov} = saddo(a, b)
5461     //   ov ? (tmp >>s 31) + 0x80000000 : r
5462     // ssub.sat(a, b) ->
5463     //   {tmp, ov} = ssubo(a, b)
5464     //   ov ? (tmp >>s 31) + 0x80000000 : r
5465     uint64_t NumBits = Ty.getScalarSizeInBits();
5466     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
5467     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
5468     auto MinVal =
5469         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
5470     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
5471   } else {
5472     // uadd.sat(a, b) ->
5473     //   {tmp, ov} = uaddo(a, b)
5474     //   ov ? 0xffffffff : tmp
5475     // usub.sat(a, b) ->
5476     //   {tmp, ov} = usubo(a, b)
5477     //   ov ? 0 : tmp
5478     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
5479   }
5480   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
5481 
5482   MI.eraseFromParent();
5483   return Legalized;
5484 }
5485 
5486 LegalizerHelper::LegalizeResult
5487 LegalizerHelper::lowerBswap(MachineInstr &MI) {
5488   Register Dst = MI.getOperand(0).getReg();
5489   Register Src = MI.getOperand(1).getReg();
5490   const LLT Ty = MRI.getType(Src);
5491   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
5492   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
5493 
5494   // Swap most and least significant byte, set remaining bytes in Res to zero.
5495   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
5496   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
5497   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
5498   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
5499 
5500   // Set i-th high/low byte in Res to i-th low/high byte from Src.
5501   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
5502     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
5503     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
5504     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
5505     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
5506     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
5507     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
5508     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
5509     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
5510     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
5511     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
5512     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
5513     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
5514   }
5515   Res.getInstr()->getOperand(0).setReg(Dst);
5516 
5517   MI.eraseFromParent();
5518   return Legalized;
5519 }
5520 
5521 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
5522 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
5523                                  MachineInstrBuilder Src, APInt Mask) {
5524   const LLT Ty = Dst.getLLTTy(*B.getMRI());
5525   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
5526   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
5527   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
5528   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
5529   return B.buildOr(Dst, LHS, RHS);
5530 }
5531 
5532 LegalizerHelper::LegalizeResult
5533 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
5534   Register Dst = MI.getOperand(0).getReg();
5535   Register Src = MI.getOperand(1).getReg();
5536   const LLT Ty = MRI.getType(Src);
5537   unsigned Size = Ty.getSizeInBits();
5538 
5539   MachineInstrBuilder BSWAP =
5540       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
5541 
5542   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
5543   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
5544   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
5545   MachineInstrBuilder Swap4 =
5546       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
5547 
5548   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
5549   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
5550   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
5551   MachineInstrBuilder Swap2 =
5552       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
5553 
5554   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
5555   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
5556   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
5557   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
5558 
5559   MI.eraseFromParent();
5560   return Legalized;
5561 }
5562 
5563 LegalizerHelper::LegalizeResult
5564 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
5565   MachineFunction &MF = MIRBuilder.getMF();
5566   const TargetSubtargetInfo &STI = MF.getSubtarget();
5567   const TargetLowering *TLI = STI.getTargetLowering();
5568 
5569   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
5570   int NameOpIdx = IsRead ? 1 : 0;
5571   int ValRegIndex = IsRead ? 0 : 1;
5572 
5573   Register ValReg = MI.getOperand(ValRegIndex).getReg();
5574   const LLT Ty = MRI.getType(ValReg);
5575   const MDString *RegStr = cast<MDString>(
5576     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
5577 
5578   Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
5579   if (!PhysReg.isValid())
5580     return UnableToLegalize;
5581 
5582   if (IsRead)
5583     MIRBuilder.buildCopy(ValReg, PhysReg);
5584   else
5585     MIRBuilder.buildCopy(PhysReg, ValReg);
5586 
5587   MI.eraseFromParent();
5588   return Legalized;
5589 }
5590