1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetFrameLowering.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetLowering.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 
29 #define DEBUG_TYPE "legalizer"
30 
31 using namespace llvm;
32 using namespace LegalizeActions;
33 using namespace MIPatternMatch;
34 
35 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
36 ///
37 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
38 /// with any leftover piece as type \p LeftoverTy
39 ///
40 /// Returns -1 in the first element of the pair if the breakdown is not
41 /// satisfiable.
42 static std::pair<int, int>
43 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
44   assert(!LeftoverTy.isValid() && "this is an out argument");
45 
46   unsigned Size = OrigTy.getSizeInBits();
47   unsigned NarrowSize = NarrowTy.getSizeInBits();
48   unsigned NumParts = Size / NarrowSize;
49   unsigned LeftoverSize = Size - NumParts * NarrowSize;
50   assert(Size > NarrowSize);
51 
52   if (LeftoverSize == 0)
53     return {NumParts, 0};
54 
55   if (NarrowTy.isVector()) {
56     unsigned EltSize = OrigTy.getScalarSizeInBits();
57     if (LeftoverSize % EltSize != 0)
58       return {-1, -1};
59     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
60   } else {
61     LeftoverTy = LLT::scalar(LeftoverSize);
62   }
63 
64   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
65   return std::make_pair(NumParts, NumLeftover);
66 }
67 
68 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
69 
70   if (!Ty.isScalar())
71     return nullptr;
72 
73   switch (Ty.getSizeInBits()) {
74   case 16:
75     return Type::getHalfTy(Ctx);
76   case 32:
77     return Type::getFloatTy(Ctx);
78   case 64:
79     return Type::getDoubleTy(Ctx);
80   case 80:
81     return Type::getX86_FP80Ty(Ctx);
82   case 128:
83     return Type::getFP128Ty(Ctx);
84   default:
85     return nullptr;
86   }
87 }
88 
89 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
90                                  GISelChangeObserver &Observer,
91                                  MachineIRBuilder &Builder)
92     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
93       LI(*MF.getSubtarget().getLegalizerInfo()),
94       TLI(*MF.getSubtarget().getTargetLowering()) { }
95 
96 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
97                                  GISelChangeObserver &Observer,
98                                  MachineIRBuilder &B)
99   : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
100     TLI(*MF.getSubtarget().getTargetLowering()) { }
101 
102 LegalizerHelper::LegalizeResult
103 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
104   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
105 
106   MIRBuilder.setInstrAndDebugLoc(MI);
107 
108   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
109       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
110     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
111   auto Step = LI.getAction(MI, MRI);
112   switch (Step.Action) {
113   case Legal:
114     LLVM_DEBUG(dbgs() << ".. Already legal\n");
115     return AlreadyLegal;
116   case Libcall:
117     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
118     return libcall(MI);
119   case NarrowScalar:
120     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
121     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
122   case WidenScalar:
123     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
124     return widenScalar(MI, Step.TypeIdx, Step.NewType);
125   case Bitcast:
126     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
127     return bitcast(MI, Step.TypeIdx, Step.NewType);
128   case Lower:
129     LLVM_DEBUG(dbgs() << ".. Lower\n");
130     return lower(MI, Step.TypeIdx, Step.NewType);
131   case FewerElements:
132     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
133     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
134   case MoreElements:
135     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
136     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
137   case Custom:
138     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
139     return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
140   default:
141     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
142     return UnableToLegalize;
143   }
144 }
145 
146 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
147                                    SmallVectorImpl<Register> &VRegs) {
148   for (int i = 0; i < NumParts; ++i)
149     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
150   MIRBuilder.buildUnmerge(VRegs, Reg);
151 }
152 
153 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
154                                    LLT MainTy, LLT &LeftoverTy,
155                                    SmallVectorImpl<Register> &VRegs,
156                                    SmallVectorImpl<Register> &LeftoverRegs) {
157   assert(!LeftoverTy.isValid() && "this is an out argument");
158 
159   unsigned RegSize = RegTy.getSizeInBits();
160   unsigned MainSize = MainTy.getSizeInBits();
161   unsigned NumParts = RegSize / MainSize;
162   unsigned LeftoverSize = RegSize - NumParts * MainSize;
163 
164   // Use an unmerge when possible.
165   if (LeftoverSize == 0) {
166     for (unsigned I = 0; I < NumParts; ++I)
167       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
168     MIRBuilder.buildUnmerge(VRegs, Reg);
169     return true;
170   }
171 
172   if (MainTy.isVector()) {
173     unsigned EltSize = MainTy.getScalarSizeInBits();
174     if (LeftoverSize % EltSize != 0)
175       return false;
176     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
177   } else {
178     LeftoverTy = LLT::scalar(LeftoverSize);
179   }
180 
181   // For irregular sizes, extract the individual parts.
182   for (unsigned I = 0; I != NumParts; ++I) {
183     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
184     VRegs.push_back(NewReg);
185     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
186   }
187 
188   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
189        Offset += LeftoverSize) {
190     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
191     LeftoverRegs.push_back(NewReg);
192     MIRBuilder.buildExtract(NewReg, Reg, Offset);
193   }
194 
195   return true;
196 }
197 
198 void LegalizerHelper::insertParts(Register DstReg,
199                                   LLT ResultTy, LLT PartTy,
200                                   ArrayRef<Register> PartRegs,
201                                   LLT LeftoverTy,
202                                   ArrayRef<Register> LeftoverRegs) {
203   if (!LeftoverTy.isValid()) {
204     assert(LeftoverRegs.empty());
205 
206     if (!ResultTy.isVector()) {
207       MIRBuilder.buildMerge(DstReg, PartRegs);
208       return;
209     }
210 
211     if (PartTy.isVector())
212       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
213     else
214       MIRBuilder.buildBuildVector(DstReg, PartRegs);
215     return;
216   }
217 
218   unsigned PartSize = PartTy.getSizeInBits();
219   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
220 
221   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
222   MIRBuilder.buildUndef(CurResultReg);
223 
224   unsigned Offset = 0;
225   for (Register PartReg : PartRegs) {
226     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
227     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
228     CurResultReg = NewResultReg;
229     Offset += PartSize;
230   }
231 
232   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
233     // Use the original output register for the final insert to avoid a copy.
234     Register NewResultReg = (I + 1 == E) ?
235       DstReg : MRI.createGenericVirtualRegister(ResultTy);
236 
237     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
238     CurResultReg = NewResultReg;
239     Offset += LeftoverPartSize;
240   }
241 }
242 
243 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
244 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
245                               const MachineInstr &MI) {
246   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
247 
248   const int StartIdx = Regs.size();
249   const int NumResults = MI.getNumOperands() - 1;
250   Regs.resize(Regs.size() + NumResults);
251   for (int I = 0; I != NumResults; ++I)
252     Regs[StartIdx + I] = MI.getOperand(I).getReg();
253 }
254 
255 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
256                                      LLT GCDTy, Register SrcReg) {
257   LLT SrcTy = MRI.getType(SrcReg);
258   if (SrcTy == GCDTy) {
259     // If the source already evenly divides the result type, we don't need to do
260     // anything.
261     Parts.push_back(SrcReg);
262   } else {
263     // Need to split into common type sized pieces.
264     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
265     getUnmergeResults(Parts, *Unmerge);
266   }
267 }
268 
269 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
270                                     LLT NarrowTy, Register SrcReg) {
271   LLT SrcTy = MRI.getType(SrcReg);
272   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
273   extractGCDType(Parts, GCDTy, SrcReg);
274   return GCDTy;
275 }
276 
277 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
278                                          SmallVectorImpl<Register> &VRegs,
279                                          unsigned PadStrategy) {
280   LLT LCMTy = getLCMType(DstTy, NarrowTy);
281 
282   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
283   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
284   int NumOrigSrc = VRegs.size();
285 
286   Register PadReg;
287 
288   // Get a value we can use to pad the source value if the sources won't evenly
289   // cover the result type.
290   if (NumOrigSrc < NumParts * NumSubParts) {
291     if (PadStrategy == TargetOpcode::G_ZEXT)
292       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
293     else if (PadStrategy == TargetOpcode::G_ANYEXT)
294       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
295     else {
296       assert(PadStrategy == TargetOpcode::G_SEXT);
297 
298       // Shift the sign bit of the low register through the high register.
299       auto ShiftAmt =
300         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
301       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
302     }
303   }
304 
305   // Registers for the final merge to be produced.
306   SmallVector<Register, 4> Remerge(NumParts);
307 
308   // Registers needed for intermediate merges, which will be merged into a
309   // source for Remerge.
310   SmallVector<Register, 4> SubMerge(NumSubParts);
311 
312   // Once we've fully read off the end of the original source bits, we can reuse
313   // the same high bits for remaining padding elements.
314   Register AllPadReg;
315 
316   // Build merges to the LCM type to cover the original result type.
317   for (int I = 0; I != NumParts; ++I) {
318     bool AllMergePartsArePadding = true;
319 
320     // Build the requested merges to the requested type.
321     for (int J = 0; J != NumSubParts; ++J) {
322       int Idx = I * NumSubParts + J;
323       if (Idx >= NumOrigSrc) {
324         SubMerge[J] = PadReg;
325         continue;
326       }
327 
328       SubMerge[J] = VRegs[Idx];
329 
330       // There are meaningful bits here we can't reuse later.
331       AllMergePartsArePadding = false;
332     }
333 
334     // If we've filled up a complete piece with padding bits, we can directly
335     // emit the natural sized constant if applicable, rather than a merge of
336     // smaller constants.
337     if (AllMergePartsArePadding && !AllPadReg) {
338       if (PadStrategy == TargetOpcode::G_ANYEXT)
339         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
340       else if (PadStrategy == TargetOpcode::G_ZEXT)
341         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
342 
343       // If this is a sign extension, we can't materialize a trivial constant
344       // with the right type and have to produce a merge.
345     }
346 
347     if (AllPadReg) {
348       // Avoid creating additional instructions if we're just adding additional
349       // copies of padding bits.
350       Remerge[I] = AllPadReg;
351       continue;
352     }
353 
354     if (NumSubParts == 1)
355       Remerge[I] = SubMerge[0];
356     else
357       Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
358 
359     // In the sign extend padding case, re-use the first all-signbit merge.
360     if (AllMergePartsArePadding && !AllPadReg)
361       AllPadReg = Remerge[I];
362   }
363 
364   VRegs = std::move(Remerge);
365   return LCMTy;
366 }
367 
368 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
369                                                ArrayRef<Register> RemergeRegs) {
370   LLT DstTy = MRI.getType(DstReg);
371 
372   // Create the merge to the widened source, and extract the relevant bits into
373   // the result.
374 
375   if (DstTy == LCMTy) {
376     MIRBuilder.buildMerge(DstReg, RemergeRegs);
377     return;
378   }
379 
380   auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
381   if (DstTy.isScalar() && LCMTy.isScalar()) {
382     MIRBuilder.buildTrunc(DstReg, Remerge);
383     return;
384   }
385 
386   if (LCMTy.isVector()) {
387     unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
388     SmallVector<Register, 8> UnmergeDefs(NumDefs);
389     UnmergeDefs[0] = DstReg;
390     for (unsigned I = 1; I != NumDefs; ++I)
391       UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
392 
393     MIRBuilder.buildUnmerge(UnmergeDefs,
394                             MIRBuilder.buildMerge(LCMTy, RemergeRegs));
395     return;
396   }
397 
398   llvm_unreachable("unhandled case");
399 }
400 
401 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
402 #define RTLIBCASE_INT(LibcallPrefix)                                           \
403   do {                                                                         \
404     switch (Size) {                                                            \
405     case 32:                                                                   \
406       return RTLIB::LibcallPrefix##32;                                         \
407     case 64:                                                                   \
408       return RTLIB::LibcallPrefix##64;                                         \
409     case 128:                                                                  \
410       return RTLIB::LibcallPrefix##128;                                        \
411     default:                                                                   \
412       llvm_unreachable("unexpected size");                                     \
413     }                                                                          \
414   } while (0)
415 
416 #define RTLIBCASE(LibcallPrefix)                                               \
417   do {                                                                         \
418     switch (Size) {                                                            \
419     case 32:                                                                   \
420       return RTLIB::LibcallPrefix##32;                                         \
421     case 64:                                                                   \
422       return RTLIB::LibcallPrefix##64;                                         \
423     case 80:                                                                   \
424       return RTLIB::LibcallPrefix##80;                                         \
425     case 128:                                                                  \
426       return RTLIB::LibcallPrefix##128;                                        \
427     default:                                                                   \
428       llvm_unreachable("unexpected size");                                     \
429     }                                                                          \
430   } while (0)
431 
432   switch (Opcode) {
433   case TargetOpcode::G_SDIV:
434     RTLIBCASE_INT(SDIV_I);
435   case TargetOpcode::G_UDIV:
436     RTLIBCASE_INT(UDIV_I);
437   case TargetOpcode::G_SREM:
438     RTLIBCASE_INT(SREM_I);
439   case TargetOpcode::G_UREM:
440     RTLIBCASE_INT(UREM_I);
441   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
442     RTLIBCASE_INT(CTLZ_I);
443   case TargetOpcode::G_FADD:
444     RTLIBCASE(ADD_F);
445   case TargetOpcode::G_FSUB:
446     RTLIBCASE(SUB_F);
447   case TargetOpcode::G_FMUL:
448     RTLIBCASE(MUL_F);
449   case TargetOpcode::G_FDIV:
450     RTLIBCASE(DIV_F);
451   case TargetOpcode::G_FEXP:
452     RTLIBCASE(EXP_F);
453   case TargetOpcode::G_FEXP2:
454     RTLIBCASE(EXP2_F);
455   case TargetOpcode::G_FREM:
456     RTLIBCASE(REM_F);
457   case TargetOpcode::G_FPOW:
458     RTLIBCASE(POW_F);
459   case TargetOpcode::G_FMA:
460     RTLIBCASE(FMA_F);
461   case TargetOpcode::G_FSIN:
462     RTLIBCASE(SIN_F);
463   case TargetOpcode::G_FCOS:
464     RTLIBCASE(COS_F);
465   case TargetOpcode::G_FLOG10:
466     RTLIBCASE(LOG10_F);
467   case TargetOpcode::G_FLOG:
468     RTLIBCASE(LOG_F);
469   case TargetOpcode::G_FLOG2:
470     RTLIBCASE(LOG2_F);
471   case TargetOpcode::G_FCEIL:
472     RTLIBCASE(CEIL_F);
473   case TargetOpcode::G_FFLOOR:
474     RTLIBCASE(FLOOR_F);
475   case TargetOpcode::G_FMINNUM:
476     RTLIBCASE(FMIN_F);
477   case TargetOpcode::G_FMAXNUM:
478     RTLIBCASE(FMAX_F);
479   case TargetOpcode::G_FSQRT:
480     RTLIBCASE(SQRT_F);
481   case TargetOpcode::G_FRINT:
482     RTLIBCASE(RINT_F);
483   case TargetOpcode::G_FNEARBYINT:
484     RTLIBCASE(NEARBYINT_F);
485   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
486     RTLIBCASE(ROUNDEVEN_F);
487   }
488   llvm_unreachable("Unknown libcall function");
489 }
490 
491 /// True if an instruction is in tail position in its caller. Intended for
492 /// legalizing libcalls as tail calls when possible.
493 static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
494                                     MachineInstr &MI) {
495   MachineBasicBlock &MBB = *MI.getParent();
496   const Function &F = MBB.getParent()->getFunction();
497 
498   // Conservatively require the attributes of the call to match those of
499   // the return. Ignore NoAlias and NonNull because they don't affect the
500   // call sequence.
501   AttributeList CallerAttrs = F.getAttributes();
502   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
503           .removeAttribute(Attribute::NoAlias)
504           .removeAttribute(Attribute::NonNull)
505           .hasAttributes())
506     return false;
507 
508   // It's not safe to eliminate the sign / zero extension of the return value.
509   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
510       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
511     return false;
512 
513   // Only tail call if the following instruction is a standard return.
514   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
515   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
516     return false;
517 
518   return true;
519 }
520 
521 LegalizerHelper::LegalizeResult
522 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
523                     const CallLowering::ArgInfo &Result,
524                     ArrayRef<CallLowering::ArgInfo> Args,
525                     const CallingConv::ID CC) {
526   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
527 
528   CallLowering::CallLoweringInfo Info;
529   Info.CallConv = CC;
530   Info.Callee = MachineOperand::CreateES(Name);
531   Info.OrigRet = Result;
532   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
533   if (!CLI.lowerCall(MIRBuilder, Info))
534     return LegalizerHelper::UnableToLegalize;
535 
536   return LegalizerHelper::Legalized;
537 }
538 
539 LegalizerHelper::LegalizeResult
540 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
541                     const CallLowering::ArgInfo &Result,
542                     ArrayRef<CallLowering::ArgInfo> Args) {
543   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
544   const char *Name = TLI.getLibcallName(Libcall);
545   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
546   return createLibcall(MIRBuilder, Name, Result, Args, CC);
547 }
548 
549 // Useful for libcalls where all operands have the same type.
550 static LegalizerHelper::LegalizeResult
551 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
552               Type *OpType) {
553   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
554 
555   SmallVector<CallLowering::ArgInfo, 3> Args;
556   for (unsigned i = 1; i < MI.getNumOperands(); i++)
557     Args.push_back({MI.getOperand(i).getReg(), OpType});
558   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
559                        Args);
560 }
561 
562 LegalizerHelper::LegalizeResult
563 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
564                        MachineInstr &MI) {
565   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
566 
567   SmallVector<CallLowering::ArgInfo, 3> Args;
568   // Add all the args, except for the last which is an imm denoting 'tail'.
569   for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
570     Register Reg = MI.getOperand(i).getReg();
571 
572     // Need derive an IR type for call lowering.
573     LLT OpLLT = MRI.getType(Reg);
574     Type *OpTy = nullptr;
575     if (OpLLT.isPointer())
576       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
577     else
578       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
579     Args.push_back({Reg, OpTy});
580   }
581 
582   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
583   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
584   RTLIB::Libcall RTLibcall;
585   switch (MI.getOpcode()) {
586   case TargetOpcode::G_MEMCPY:
587     RTLibcall = RTLIB::MEMCPY;
588     break;
589   case TargetOpcode::G_MEMMOVE:
590     RTLibcall = RTLIB::MEMMOVE;
591     break;
592   case TargetOpcode::G_MEMSET:
593     RTLibcall = RTLIB::MEMSET;
594     break;
595   default:
596     return LegalizerHelper::UnableToLegalize;
597   }
598   const char *Name = TLI.getLibcallName(RTLibcall);
599 
600   CallLowering::CallLoweringInfo Info;
601   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
602   Info.Callee = MachineOperand::CreateES(Name);
603   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
604   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
605                     isLibCallInTailPosition(MIRBuilder.getTII(), MI);
606 
607   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
608   if (!CLI.lowerCall(MIRBuilder, Info))
609     return LegalizerHelper::UnableToLegalize;
610 
611   if (Info.LoweredTailCall) {
612     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
613     // We must have a return following the call (or debug insts) to get past
614     // isLibCallInTailPosition.
615     do {
616       MachineInstr *Next = MI.getNextNode();
617       assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
618              "Expected instr following MI to be return or debug inst?");
619       // We lowered a tail call, so the call is now the return from the block.
620       // Delete the old return.
621       Next->eraseFromParent();
622     } while (MI.getNextNode());
623   }
624 
625   return LegalizerHelper::Legalized;
626 }
627 
628 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
629                                        Type *FromType) {
630   auto ToMVT = MVT::getVT(ToType);
631   auto FromMVT = MVT::getVT(FromType);
632 
633   switch (Opcode) {
634   case TargetOpcode::G_FPEXT:
635     return RTLIB::getFPEXT(FromMVT, ToMVT);
636   case TargetOpcode::G_FPTRUNC:
637     return RTLIB::getFPROUND(FromMVT, ToMVT);
638   case TargetOpcode::G_FPTOSI:
639     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
640   case TargetOpcode::G_FPTOUI:
641     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
642   case TargetOpcode::G_SITOFP:
643     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
644   case TargetOpcode::G_UITOFP:
645     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
646   }
647   llvm_unreachable("Unsupported libcall function");
648 }
649 
650 static LegalizerHelper::LegalizeResult
651 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
652                   Type *FromType) {
653   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
654   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
655                        {{MI.getOperand(1).getReg(), FromType}});
656 }
657 
658 LegalizerHelper::LegalizeResult
659 LegalizerHelper::libcall(MachineInstr &MI) {
660   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
661   unsigned Size = LLTy.getSizeInBits();
662   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
663 
664   switch (MI.getOpcode()) {
665   default:
666     return UnableToLegalize;
667   case TargetOpcode::G_SDIV:
668   case TargetOpcode::G_UDIV:
669   case TargetOpcode::G_SREM:
670   case TargetOpcode::G_UREM:
671   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
672     Type *HLTy = IntegerType::get(Ctx, Size);
673     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
674     if (Status != Legalized)
675       return Status;
676     break;
677   }
678   case TargetOpcode::G_FADD:
679   case TargetOpcode::G_FSUB:
680   case TargetOpcode::G_FMUL:
681   case TargetOpcode::G_FDIV:
682   case TargetOpcode::G_FMA:
683   case TargetOpcode::G_FPOW:
684   case TargetOpcode::G_FREM:
685   case TargetOpcode::G_FCOS:
686   case TargetOpcode::G_FSIN:
687   case TargetOpcode::G_FLOG10:
688   case TargetOpcode::G_FLOG:
689   case TargetOpcode::G_FLOG2:
690   case TargetOpcode::G_FEXP:
691   case TargetOpcode::G_FEXP2:
692   case TargetOpcode::G_FCEIL:
693   case TargetOpcode::G_FFLOOR:
694   case TargetOpcode::G_FMINNUM:
695   case TargetOpcode::G_FMAXNUM:
696   case TargetOpcode::G_FSQRT:
697   case TargetOpcode::G_FRINT:
698   case TargetOpcode::G_FNEARBYINT:
699   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
700     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
701     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
702       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
703       return UnableToLegalize;
704     }
705     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
706     if (Status != Legalized)
707       return Status;
708     break;
709   }
710   case TargetOpcode::G_FPEXT:
711   case TargetOpcode::G_FPTRUNC: {
712     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
713     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
714     if (!FromTy || !ToTy)
715       return UnableToLegalize;
716     LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
717     if (Status != Legalized)
718       return Status;
719     break;
720   }
721   case TargetOpcode::G_FPTOSI:
722   case TargetOpcode::G_FPTOUI: {
723     // FIXME: Support other types
724     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
725     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
726     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
727       return UnableToLegalize;
728     LegalizeResult Status = conversionLibcall(
729         MI, MIRBuilder,
730         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
731         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
732     if (Status != Legalized)
733       return Status;
734     break;
735   }
736   case TargetOpcode::G_SITOFP:
737   case TargetOpcode::G_UITOFP: {
738     // FIXME: Support other types
739     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
740     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
741     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
742       return UnableToLegalize;
743     LegalizeResult Status = conversionLibcall(
744         MI, MIRBuilder,
745         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
746         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
747     if (Status != Legalized)
748       return Status;
749     break;
750   }
751   case TargetOpcode::G_MEMCPY:
752   case TargetOpcode::G_MEMMOVE:
753   case TargetOpcode::G_MEMSET: {
754     LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
755     MI.eraseFromParent();
756     return Result;
757   }
758   }
759 
760   MI.eraseFromParent();
761   return Legalized;
762 }
763 
764 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
765                                                               unsigned TypeIdx,
766                                                               LLT NarrowTy) {
767   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
768   uint64_t NarrowSize = NarrowTy.getSizeInBits();
769 
770   switch (MI.getOpcode()) {
771   default:
772     return UnableToLegalize;
773   case TargetOpcode::G_IMPLICIT_DEF: {
774     Register DstReg = MI.getOperand(0).getReg();
775     LLT DstTy = MRI.getType(DstReg);
776 
777     // If SizeOp0 is not an exact multiple of NarrowSize, emit
778     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
779     // FIXME: Although this would also be legal for the general case, it causes
780     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
781     //  combines not being hit). This seems to be a problem related to the
782     //  artifact combiner.
783     if (SizeOp0 % NarrowSize != 0) {
784       LLT ImplicitTy = NarrowTy;
785       if (DstTy.isVector())
786         ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
787 
788       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
789       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
790 
791       MI.eraseFromParent();
792       return Legalized;
793     }
794 
795     int NumParts = SizeOp0 / NarrowSize;
796 
797     SmallVector<Register, 2> DstRegs;
798     for (int i = 0; i < NumParts; ++i)
799       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
800 
801     if (DstTy.isVector())
802       MIRBuilder.buildBuildVector(DstReg, DstRegs);
803     else
804       MIRBuilder.buildMerge(DstReg, DstRegs);
805     MI.eraseFromParent();
806     return Legalized;
807   }
808   case TargetOpcode::G_CONSTANT: {
809     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
810     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
811     unsigned TotalSize = Ty.getSizeInBits();
812     unsigned NarrowSize = NarrowTy.getSizeInBits();
813     int NumParts = TotalSize / NarrowSize;
814 
815     SmallVector<Register, 4> PartRegs;
816     for (int I = 0; I != NumParts; ++I) {
817       unsigned Offset = I * NarrowSize;
818       auto K = MIRBuilder.buildConstant(NarrowTy,
819                                         Val.lshr(Offset).trunc(NarrowSize));
820       PartRegs.push_back(K.getReg(0));
821     }
822 
823     LLT LeftoverTy;
824     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
825     SmallVector<Register, 1> LeftoverRegs;
826     if (LeftoverBits != 0) {
827       LeftoverTy = LLT::scalar(LeftoverBits);
828       auto K = MIRBuilder.buildConstant(
829         LeftoverTy,
830         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
831       LeftoverRegs.push_back(K.getReg(0));
832     }
833 
834     insertParts(MI.getOperand(0).getReg(),
835                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
836 
837     MI.eraseFromParent();
838     return Legalized;
839   }
840   case TargetOpcode::G_SEXT:
841   case TargetOpcode::G_ZEXT:
842   case TargetOpcode::G_ANYEXT:
843     return narrowScalarExt(MI, TypeIdx, NarrowTy);
844   case TargetOpcode::G_TRUNC: {
845     if (TypeIdx != 1)
846       return UnableToLegalize;
847 
848     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
849     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
850       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
851       return UnableToLegalize;
852     }
853 
854     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
855     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
856     MI.eraseFromParent();
857     return Legalized;
858   }
859 
860   case TargetOpcode::G_FREEZE:
861     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
862   case TargetOpcode::G_ADD:
863   case TargetOpcode::G_SUB:
864   case TargetOpcode::G_SADDO:
865   case TargetOpcode::G_SSUBO:
866   case TargetOpcode::G_SADDE:
867   case TargetOpcode::G_SSUBE:
868   case TargetOpcode::G_UADDO:
869   case TargetOpcode::G_USUBO:
870   case TargetOpcode::G_UADDE:
871   case TargetOpcode::G_USUBE:
872     return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
873   case TargetOpcode::G_MUL:
874   case TargetOpcode::G_UMULH:
875     return narrowScalarMul(MI, NarrowTy);
876   case TargetOpcode::G_EXTRACT:
877     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
878   case TargetOpcode::G_INSERT:
879     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
880   case TargetOpcode::G_LOAD: {
881     auto &MMO = **MI.memoperands_begin();
882     Register DstReg = MI.getOperand(0).getReg();
883     LLT DstTy = MRI.getType(DstReg);
884     if (DstTy.isVector())
885       return UnableToLegalize;
886 
887     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
888       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
889       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
890       MIRBuilder.buildAnyExt(DstReg, TmpReg);
891       MI.eraseFromParent();
892       return Legalized;
893     }
894 
895     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
896   }
897   case TargetOpcode::G_ZEXTLOAD:
898   case TargetOpcode::G_SEXTLOAD: {
899     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
900     Register DstReg = MI.getOperand(0).getReg();
901     Register PtrReg = MI.getOperand(1).getReg();
902 
903     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
904     auto &MMO = **MI.memoperands_begin();
905     unsigned MemSize = MMO.getSizeInBits();
906 
907     if (MemSize == NarrowSize) {
908       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
909     } else if (MemSize < NarrowSize) {
910       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
911     } else if (MemSize > NarrowSize) {
912       // FIXME: Need to split the load.
913       return UnableToLegalize;
914     }
915 
916     if (ZExt)
917       MIRBuilder.buildZExt(DstReg, TmpReg);
918     else
919       MIRBuilder.buildSExt(DstReg, TmpReg);
920 
921     MI.eraseFromParent();
922     return Legalized;
923   }
924   case TargetOpcode::G_STORE: {
925     const auto &MMO = **MI.memoperands_begin();
926 
927     Register SrcReg = MI.getOperand(0).getReg();
928     LLT SrcTy = MRI.getType(SrcReg);
929     if (SrcTy.isVector())
930       return UnableToLegalize;
931 
932     int NumParts = SizeOp0 / NarrowSize;
933     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
934     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
935     if (SrcTy.isVector() && LeftoverBits != 0)
936       return UnableToLegalize;
937 
938     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
939       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
940       auto &MMO = **MI.memoperands_begin();
941       MIRBuilder.buildTrunc(TmpReg, SrcReg);
942       MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
943       MI.eraseFromParent();
944       return Legalized;
945     }
946 
947     return reduceLoadStoreWidth(MI, 0, NarrowTy);
948   }
949   case TargetOpcode::G_SELECT:
950     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
951   case TargetOpcode::G_AND:
952   case TargetOpcode::G_OR:
953   case TargetOpcode::G_XOR: {
954     // Legalize bitwise operation:
955     // A = BinOp<Ty> B, C
956     // into:
957     // B1, ..., BN = G_UNMERGE_VALUES B
958     // C1, ..., CN = G_UNMERGE_VALUES C
959     // A1 = BinOp<Ty/N> B1, C2
960     // ...
961     // AN = BinOp<Ty/N> BN, CN
962     // A = G_MERGE_VALUES A1, ..., AN
963     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
964   }
965   case TargetOpcode::G_SHL:
966   case TargetOpcode::G_LSHR:
967   case TargetOpcode::G_ASHR:
968     return narrowScalarShift(MI, TypeIdx, NarrowTy);
969   case TargetOpcode::G_CTLZ:
970   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
971   case TargetOpcode::G_CTTZ:
972   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
973   case TargetOpcode::G_CTPOP:
974     if (TypeIdx == 1)
975       switch (MI.getOpcode()) {
976       case TargetOpcode::G_CTLZ:
977       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
978         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
979       case TargetOpcode::G_CTTZ:
980       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
981         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
982       case TargetOpcode::G_CTPOP:
983         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
984       default:
985         return UnableToLegalize;
986       }
987 
988     Observer.changingInstr(MI);
989     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
990     Observer.changedInstr(MI);
991     return Legalized;
992   case TargetOpcode::G_INTTOPTR:
993     if (TypeIdx != 1)
994       return UnableToLegalize;
995 
996     Observer.changingInstr(MI);
997     narrowScalarSrc(MI, NarrowTy, 1);
998     Observer.changedInstr(MI);
999     return Legalized;
1000   case TargetOpcode::G_PTRTOINT:
1001     if (TypeIdx != 0)
1002       return UnableToLegalize;
1003 
1004     Observer.changingInstr(MI);
1005     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1006     Observer.changedInstr(MI);
1007     return Legalized;
1008   case TargetOpcode::G_PHI: {
1009     unsigned NumParts = SizeOp0 / NarrowSize;
1010     SmallVector<Register, 2> DstRegs(NumParts);
1011     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1012     Observer.changingInstr(MI);
1013     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1014       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1015       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1016       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1017                    SrcRegs[i / 2]);
1018     }
1019     MachineBasicBlock &MBB = *MI.getParent();
1020     MIRBuilder.setInsertPt(MBB, MI);
1021     for (unsigned i = 0; i < NumParts; ++i) {
1022       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1023       MachineInstrBuilder MIB =
1024           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1025       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1026         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1027     }
1028     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1029     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1030     Observer.changedInstr(MI);
1031     MI.eraseFromParent();
1032     return Legalized;
1033   }
1034   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1035   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1036     if (TypeIdx != 2)
1037       return UnableToLegalize;
1038 
1039     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1040     Observer.changingInstr(MI);
1041     narrowScalarSrc(MI, NarrowTy, OpIdx);
1042     Observer.changedInstr(MI);
1043     return Legalized;
1044   }
1045   case TargetOpcode::G_ICMP: {
1046     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1047     if (NarrowSize * 2 != SrcSize)
1048       return UnableToLegalize;
1049 
1050     Observer.changingInstr(MI);
1051     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1052     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1053     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1054 
1055     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1056     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1057     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1058 
1059     CmpInst::Predicate Pred =
1060         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1061     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1062 
1063     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1064       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1065       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1066       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1067       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1068       MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1069     } else {
1070       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1071       MachineInstrBuilder CmpHEQ =
1072           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1073       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1074           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1075       MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1076     }
1077     Observer.changedInstr(MI);
1078     MI.eraseFromParent();
1079     return Legalized;
1080   }
1081   case TargetOpcode::G_SEXT_INREG: {
1082     if (TypeIdx != 0)
1083       return UnableToLegalize;
1084 
1085     int64_t SizeInBits = MI.getOperand(2).getImm();
1086 
1087     // So long as the new type has more bits than the bits we're extending we
1088     // don't need to break it apart.
1089     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1090       Observer.changingInstr(MI);
1091       // We don't lose any non-extension bits by truncating the src and
1092       // sign-extending the dst.
1093       MachineOperand &MO1 = MI.getOperand(1);
1094       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1095       MO1.setReg(TruncMIB.getReg(0));
1096 
1097       MachineOperand &MO2 = MI.getOperand(0);
1098       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1099       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1100       MIRBuilder.buildSExt(MO2, DstExt);
1101       MO2.setReg(DstExt);
1102       Observer.changedInstr(MI);
1103       return Legalized;
1104     }
1105 
1106     // Break it apart. Components below the extension point are unmodified. The
1107     // component containing the extension point becomes a narrower SEXT_INREG.
1108     // Components above it are ashr'd from the component containing the
1109     // extension point.
1110     if (SizeOp0 % NarrowSize != 0)
1111       return UnableToLegalize;
1112     int NumParts = SizeOp0 / NarrowSize;
1113 
1114     // List the registers where the destination will be scattered.
1115     SmallVector<Register, 2> DstRegs;
1116     // List the registers where the source will be split.
1117     SmallVector<Register, 2> SrcRegs;
1118 
1119     // Create all the temporary registers.
1120     for (int i = 0; i < NumParts; ++i) {
1121       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1122 
1123       SrcRegs.push_back(SrcReg);
1124     }
1125 
1126     // Explode the big arguments into smaller chunks.
1127     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1128 
1129     Register AshrCstReg =
1130         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1131             .getReg(0);
1132     Register FullExtensionReg = 0;
1133     Register PartialExtensionReg = 0;
1134 
1135     // Do the operation on each small part.
1136     for (int i = 0; i < NumParts; ++i) {
1137       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1138         DstRegs.push_back(SrcRegs[i]);
1139       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1140         assert(PartialExtensionReg &&
1141                "Expected to visit partial extension before full");
1142         if (FullExtensionReg) {
1143           DstRegs.push_back(FullExtensionReg);
1144           continue;
1145         }
1146         DstRegs.push_back(
1147             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1148                 .getReg(0));
1149         FullExtensionReg = DstRegs.back();
1150       } else {
1151         DstRegs.push_back(
1152             MIRBuilder
1153                 .buildInstr(
1154                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
1155                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1156                 .getReg(0));
1157         PartialExtensionReg = DstRegs.back();
1158       }
1159     }
1160 
1161     // Gather the destination registers into the final destination.
1162     Register DstReg = MI.getOperand(0).getReg();
1163     MIRBuilder.buildMerge(DstReg, DstRegs);
1164     MI.eraseFromParent();
1165     return Legalized;
1166   }
1167   case TargetOpcode::G_BSWAP:
1168   case TargetOpcode::G_BITREVERSE: {
1169     if (SizeOp0 % NarrowSize != 0)
1170       return UnableToLegalize;
1171 
1172     Observer.changingInstr(MI);
1173     SmallVector<Register, 2> SrcRegs, DstRegs;
1174     unsigned NumParts = SizeOp0 / NarrowSize;
1175     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1176 
1177     for (unsigned i = 0; i < NumParts; ++i) {
1178       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1179                                            {SrcRegs[NumParts - 1 - i]});
1180       DstRegs.push_back(DstPart.getReg(0));
1181     }
1182 
1183     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1184 
1185     Observer.changedInstr(MI);
1186     MI.eraseFromParent();
1187     return Legalized;
1188   }
1189   case TargetOpcode::G_PTR_ADD:
1190   case TargetOpcode::G_PTRMASK: {
1191     if (TypeIdx != 1)
1192       return UnableToLegalize;
1193     Observer.changingInstr(MI);
1194     narrowScalarSrc(MI, NarrowTy, 2);
1195     Observer.changedInstr(MI);
1196     return Legalized;
1197   }
1198   case TargetOpcode::G_FPTOUI: {
1199     if (TypeIdx != 0)
1200       return UnableToLegalize;
1201     Observer.changingInstr(MI);
1202     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1203     Observer.changedInstr(MI);
1204     return Legalized;
1205   }
1206   case TargetOpcode::G_FPTOSI: {
1207     if (TypeIdx != 0)
1208       return UnableToLegalize;
1209     Observer.changingInstr(MI);
1210     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
1211     Observer.changedInstr(MI);
1212     return Legalized;
1213   }
1214   case TargetOpcode::G_FPEXT:
1215     if (TypeIdx != 0)
1216       return UnableToLegalize;
1217     Observer.changingInstr(MI);
1218     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1219     Observer.changedInstr(MI);
1220     return Legalized;
1221   }
1222 }
1223 
1224 Register LegalizerHelper::coerceToScalar(Register Val) {
1225   LLT Ty = MRI.getType(Val);
1226   if (Ty.isScalar())
1227     return Val;
1228 
1229   const DataLayout &DL = MIRBuilder.getDataLayout();
1230   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1231   if (Ty.isPointer()) {
1232     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1233       return Register();
1234     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1235   }
1236 
1237   Register NewVal = Val;
1238 
1239   assert(Ty.isVector());
1240   LLT EltTy = Ty.getElementType();
1241   if (EltTy.isPointer())
1242     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1243   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1244 }
1245 
1246 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1247                                      unsigned OpIdx, unsigned ExtOpcode) {
1248   MachineOperand &MO = MI.getOperand(OpIdx);
1249   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1250   MO.setReg(ExtB.getReg(0));
1251 }
1252 
1253 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1254                                       unsigned OpIdx) {
1255   MachineOperand &MO = MI.getOperand(OpIdx);
1256   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1257   MO.setReg(ExtB.getReg(0));
1258 }
1259 
1260 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1261                                      unsigned OpIdx, unsigned TruncOpcode) {
1262   MachineOperand &MO = MI.getOperand(OpIdx);
1263   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1264   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1265   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1266   MO.setReg(DstExt);
1267 }
1268 
1269 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1270                                       unsigned OpIdx, unsigned ExtOpcode) {
1271   MachineOperand &MO = MI.getOperand(OpIdx);
1272   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1273   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1274   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1275   MO.setReg(DstTrunc);
1276 }
1277 
1278 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1279                                             unsigned OpIdx) {
1280   MachineOperand &MO = MI.getOperand(OpIdx);
1281   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1282   MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1283 }
1284 
1285 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1286                                             unsigned OpIdx) {
1287   MachineOperand &MO = MI.getOperand(OpIdx);
1288 
1289   LLT OldTy = MRI.getType(MO.getReg());
1290   unsigned OldElts = OldTy.getNumElements();
1291   unsigned NewElts = MoreTy.getNumElements();
1292 
1293   unsigned NumParts = NewElts / OldElts;
1294 
1295   // Use concat_vectors if the result is a multiple of the number of elements.
1296   if (NumParts * OldElts == NewElts) {
1297     SmallVector<Register, 8> Parts;
1298     Parts.push_back(MO.getReg());
1299 
1300     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1301     for (unsigned I = 1; I != NumParts; ++I)
1302       Parts.push_back(ImpDef);
1303 
1304     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1305     MO.setReg(Concat.getReg(0));
1306     return;
1307   }
1308 
1309   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1310   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1311   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1312   MO.setReg(MoreReg);
1313 }
1314 
1315 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1316   MachineOperand &Op = MI.getOperand(OpIdx);
1317   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1318 }
1319 
1320 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1321   MachineOperand &MO = MI.getOperand(OpIdx);
1322   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1323   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1324   MIRBuilder.buildBitcast(MO, CastDst);
1325   MO.setReg(CastDst);
1326 }
1327 
1328 LegalizerHelper::LegalizeResult
1329 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1330                                         LLT WideTy) {
1331   if (TypeIdx != 1)
1332     return UnableToLegalize;
1333 
1334   Register DstReg = MI.getOperand(0).getReg();
1335   LLT DstTy = MRI.getType(DstReg);
1336   if (DstTy.isVector())
1337     return UnableToLegalize;
1338 
1339   Register Src1 = MI.getOperand(1).getReg();
1340   LLT SrcTy = MRI.getType(Src1);
1341   const int DstSize = DstTy.getSizeInBits();
1342   const int SrcSize = SrcTy.getSizeInBits();
1343   const int WideSize = WideTy.getSizeInBits();
1344   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1345 
1346   unsigned NumOps = MI.getNumOperands();
1347   unsigned NumSrc = MI.getNumOperands() - 1;
1348   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1349 
1350   if (WideSize >= DstSize) {
1351     // Directly pack the bits in the target type.
1352     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1353 
1354     for (unsigned I = 2; I != NumOps; ++I) {
1355       const unsigned Offset = (I - 1) * PartSize;
1356 
1357       Register SrcReg = MI.getOperand(I).getReg();
1358       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1359 
1360       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1361 
1362       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1363         MRI.createGenericVirtualRegister(WideTy);
1364 
1365       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1366       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1367       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1368       ResultReg = NextResult;
1369     }
1370 
1371     if (WideSize > DstSize)
1372       MIRBuilder.buildTrunc(DstReg, ResultReg);
1373     else if (DstTy.isPointer())
1374       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1375 
1376     MI.eraseFromParent();
1377     return Legalized;
1378   }
1379 
1380   // Unmerge the original values to the GCD type, and recombine to the next
1381   // multiple greater than the original type.
1382   //
1383   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1384   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1385   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1386   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1387   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1388   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1389   // %12:_(s12) = G_MERGE_VALUES %10, %11
1390   //
1391   // Padding with undef if necessary:
1392   //
1393   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1394   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1395   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1396   // %7:_(s2) = G_IMPLICIT_DEF
1397   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1398   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1399   // %10:_(s12) = G_MERGE_VALUES %8, %9
1400 
1401   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1402   LLT GCDTy = LLT::scalar(GCD);
1403 
1404   SmallVector<Register, 8> Parts;
1405   SmallVector<Register, 8> NewMergeRegs;
1406   SmallVector<Register, 8> Unmerges;
1407   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1408 
1409   // Decompose the original operands if they don't evenly divide.
1410   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1411     Register SrcReg = MI.getOperand(I).getReg();
1412     if (GCD == SrcSize) {
1413       Unmerges.push_back(SrcReg);
1414     } else {
1415       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1416       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1417         Unmerges.push_back(Unmerge.getReg(J));
1418     }
1419   }
1420 
1421   // Pad with undef to the next size that is a multiple of the requested size.
1422   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1423     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1424     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1425       Unmerges.push_back(UndefReg);
1426   }
1427 
1428   const int PartsPerGCD = WideSize / GCD;
1429 
1430   // Build merges of each piece.
1431   ArrayRef<Register> Slicer(Unmerges);
1432   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1433     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1434     NewMergeRegs.push_back(Merge.getReg(0));
1435   }
1436 
1437   // A truncate may be necessary if the requested type doesn't evenly divide the
1438   // original result type.
1439   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1440     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1441   } else {
1442     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1443     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1444   }
1445 
1446   MI.eraseFromParent();
1447   return Legalized;
1448 }
1449 
1450 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
1451   Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1452   LLT OrigTy = MRI.getType(OrigReg);
1453   LLT LCMTy = getLCMType(WideTy, OrigTy);
1454 
1455   const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1456   const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1457 
1458   Register UnmergeSrc = WideReg;
1459 
1460   // Create a merge to the LCM type, padding with undef
1461   // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1462   // =>
1463   // %1:_(<4 x s32>) = G_FOO
1464   // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1465   // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1466   // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1467   if (NumMergeParts > 1) {
1468     Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1469     SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1470     MergeParts[0] = WideReg;
1471     UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1472   }
1473 
1474   // Unmerge to the original register and pad with dead defs.
1475   SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1476   UnmergeResults[0] = OrigReg;
1477   for (int I = 1; I != NumUnmergeParts; ++I)
1478     UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1479 
1480   MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1481   return WideReg;
1482 }
1483 
1484 LegalizerHelper::LegalizeResult
1485 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1486                                           LLT WideTy) {
1487   if (TypeIdx != 0)
1488     return UnableToLegalize;
1489 
1490   int NumDst = MI.getNumOperands() - 1;
1491   Register SrcReg = MI.getOperand(NumDst).getReg();
1492   LLT SrcTy = MRI.getType(SrcReg);
1493   if (SrcTy.isVector())
1494     return UnableToLegalize;
1495 
1496   Register Dst0Reg = MI.getOperand(0).getReg();
1497   LLT DstTy = MRI.getType(Dst0Reg);
1498   if (!DstTy.isScalar())
1499     return UnableToLegalize;
1500 
1501   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1502     if (SrcTy.isPointer()) {
1503       const DataLayout &DL = MIRBuilder.getDataLayout();
1504       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1505         LLVM_DEBUG(
1506             dbgs() << "Not casting non-integral address space integer\n");
1507         return UnableToLegalize;
1508       }
1509 
1510       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1511       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1512     }
1513 
1514     // Widen SrcTy to WideTy. This does not affect the result, but since the
1515     // user requested this size, it is probably better handled than SrcTy and
1516     // should reduce the total number of legalization artifacts
1517     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1518       SrcTy = WideTy;
1519       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1520     }
1521 
1522     // Theres no unmerge type to target. Directly extract the bits from the
1523     // source type
1524     unsigned DstSize = DstTy.getSizeInBits();
1525 
1526     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1527     for (int I = 1; I != NumDst; ++I) {
1528       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1529       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1530       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1531     }
1532 
1533     MI.eraseFromParent();
1534     return Legalized;
1535   }
1536 
1537   // Extend the source to a wider type.
1538   LLT LCMTy = getLCMType(SrcTy, WideTy);
1539 
1540   Register WideSrc = SrcReg;
1541   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1542     // TODO: If this is an integral address space, cast to integer and anyext.
1543     if (SrcTy.isPointer()) {
1544       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1545       return UnableToLegalize;
1546     }
1547 
1548     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1549   }
1550 
1551   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1552 
1553   // Create a sequence of unmerges and merges to the original results. Since we
1554   // may have widened the source, we will need to pad the results with dead defs
1555   // to cover the source register.
1556   // e.g. widen s48 to s64:
1557   // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1558   //
1559   // =>
1560   //  %4:_(s192) = G_ANYEXT %0:_(s96)
1561   //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1562   //  ; unpack to GCD type, with extra dead defs
1563   //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1564   //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1565   //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1566   //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
1567   //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1568   const LLT GCDTy = getGCDType(WideTy, DstTy);
1569   const int NumUnmerge = Unmerge->getNumOperands() - 1;
1570   const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1571 
1572   // Directly unmerge to the destination without going through a GCD type
1573   // if possible
1574   if (PartsPerRemerge == 1) {
1575     const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1576 
1577     for (int I = 0; I != NumUnmerge; ++I) {
1578       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1579 
1580       for (int J = 0; J != PartsPerUnmerge; ++J) {
1581         int Idx = I * PartsPerUnmerge + J;
1582         if (Idx < NumDst)
1583           MIB.addDef(MI.getOperand(Idx).getReg());
1584         else {
1585           // Create dead def for excess components.
1586           MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1587         }
1588       }
1589 
1590       MIB.addUse(Unmerge.getReg(I));
1591     }
1592   } else {
1593     SmallVector<Register, 16> Parts;
1594     for (int J = 0; J != NumUnmerge; ++J)
1595       extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1596 
1597     SmallVector<Register, 8> RemergeParts;
1598     for (int I = 0; I != NumDst; ++I) {
1599       for (int J = 0; J < PartsPerRemerge; ++J) {
1600         const int Idx = I * PartsPerRemerge + J;
1601         RemergeParts.emplace_back(Parts[Idx]);
1602       }
1603 
1604       MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1605       RemergeParts.clear();
1606     }
1607   }
1608 
1609   MI.eraseFromParent();
1610   return Legalized;
1611 }
1612 
1613 LegalizerHelper::LegalizeResult
1614 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1615                                     LLT WideTy) {
1616   Register DstReg = MI.getOperand(0).getReg();
1617   Register SrcReg = MI.getOperand(1).getReg();
1618   LLT SrcTy = MRI.getType(SrcReg);
1619 
1620   LLT DstTy = MRI.getType(DstReg);
1621   unsigned Offset = MI.getOperand(2).getImm();
1622 
1623   if (TypeIdx == 0) {
1624     if (SrcTy.isVector() || DstTy.isVector())
1625       return UnableToLegalize;
1626 
1627     SrcOp Src(SrcReg);
1628     if (SrcTy.isPointer()) {
1629       // Extracts from pointers can be handled only if they are really just
1630       // simple integers.
1631       const DataLayout &DL = MIRBuilder.getDataLayout();
1632       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1633         return UnableToLegalize;
1634 
1635       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1636       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1637       SrcTy = SrcAsIntTy;
1638     }
1639 
1640     if (DstTy.isPointer())
1641       return UnableToLegalize;
1642 
1643     if (Offset == 0) {
1644       // Avoid a shift in the degenerate case.
1645       MIRBuilder.buildTrunc(DstReg,
1646                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1647       MI.eraseFromParent();
1648       return Legalized;
1649     }
1650 
1651     // Do a shift in the source type.
1652     LLT ShiftTy = SrcTy;
1653     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1654       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1655       ShiftTy = WideTy;
1656     }
1657 
1658     auto LShr = MIRBuilder.buildLShr(
1659       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1660     MIRBuilder.buildTrunc(DstReg, LShr);
1661     MI.eraseFromParent();
1662     return Legalized;
1663   }
1664 
1665   if (SrcTy.isScalar()) {
1666     Observer.changingInstr(MI);
1667     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1668     Observer.changedInstr(MI);
1669     return Legalized;
1670   }
1671 
1672   if (!SrcTy.isVector())
1673     return UnableToLegalize;
1674 
1675   if (DstTy != SrcTy.getElementType())
1676     return UnableToLegalize;
1677 
1678   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1679     return UnableToLegalize;
1680 
1681   Observer.changingInstr(MI);
1682   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1683 
1684   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1685                           Offset);
1686   widenScalarDst(MI, WideTy.getScalarType(), 0);
1687   Observer.changedInstr(MI);
1688   return Legalized;
1689 }
1690 
1691 LegalizerHelper::LegalizeResult
1692 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1693                                    LLT WideTy) {
1694   if (TypeIdx != 0 || WideTy.isVector())
1695     return UnableToLegalize;
1696   Observer.changingInstr(MI);
1697   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1698   widenScalarDst(MI, WideTy);
1699   Observer.changedInstr(MI);
1700   return Legalized;
1701 }
1702 
1703 LegalizerHelper::LegalizeResult
1704 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1705                                            LLT WideTy) {
1706   if (TypeIdx == 1)
1707     return UnableToLegalize; // TODO
1708 
1709   unsigned Opcode;
1710   unsigned ExtOpcode;
1711   Optional<Register> CarryIn = None;
1712   switch (MI.getOpcode()) {
1713   default:
1714     llvm_unreachable("Unexpected opcode!");
1715   case TargetOpcode::G_SADDO:
1716     Opcode = TargetOpcode::G_ADD;
1717     ExtOpcode = TargetOpcode::G_SEXT;
1718     break;
1719   case TargetOpcode::G_SSUBO:
1720     Opcode = TargetOpcode::G_SUB;
1721     ExtOpcode = TargetOpcode::G_SEXT;
1722     break;
1723   case TargetOpcode::G_UADDO:
1724     Opcode = TargetOpcode::G_ADD;
1725     ExtOpcode = TargetOpcode::G_ZEXT;
1726     break;
1727   case TargetOpcode::G_USUBO:
1728     Opcode = TargetOpcode::G_SUB;
1729     ExtOpcode = TargetOpcode::G_ZEXT;
1730     break;
1731   case TargetOpcode::G_SADDE:
1732     Opcode = TargetOpcode::G_UADDE;
1733     ExtOpcode = TargetOpcode::G_SEXT;
1734     CarryIn = MI.getOperand(4).getReg();
1735     break;
1736   case TargetOpcode::G_SSUBE:
1737     Opcode = TargetOpcode::G_USUBE;
1738     ExtOpcode = TargetOpcode::G_SEXT;
1739     CarryIn = MI.getOperand(4).getReg();
1740     break;
1741   case TargetOpcode::G_UADDE:
1742     Opcode = TargetOpcode::G_UADDE;
1743     ExtOpcode = TargetOpcode::G_ZEXT;
1744     CarryIn = MI.getOperand(4).getReg();
1745     break;
1746   case TargetOpcode::G_USUBE:
1747     Opcode = TargetOpcode::G_USUBE;
1748     ExtOpcode = TargetOpcode::G_ZEXT;
1749     CarryIn = MI.getOperand(4).getReg();
1750     break;
1751   }
1752 
1753   auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1754   auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1755   // Do the arithmetic in the larger type.
1756   Register NewOp;
1757   if (CarryIn) {
1758     LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1759     NewOp = MIRBuilder
1760                 .buildInstr(Opcode, {WideTy, CarryOutTy},
1761                             {LHSExt, RHSExt, *CarryIn})
1762                 .getReg(0);
1763   } else {
1764     NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1765   }
1766   LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1767   auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1768   auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1769   // There is no overflow if the ExtOp is the same as NewOp.
1770   MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1771   // Now trunc the NewOp to the original result.
1772   MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1773   MI.eraseFromParent();
1774   return Legalized;
1775 }
1776 
1777 LegalizerHelper::LegalizeResult
1778 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1779                                          LLT WideTy) {
1780   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1781                   MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1782                   MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1783   bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1784                  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1785   // We can convert this to:
1786   //   1. Any extend iN to iM
1787   //   2. SHL by M-N
1788   //   3. [US][ADD|SUB|SHL]SAT
1789   //   4. L/ASHR by M-N
1790   //
1791   // It may be more efficient to lower this to a min and a max operation in
1792   // the higher precision arithmetic if the promoted operation isn't legal,
1793   // but this decision is up to the target's lowering request.
1794   Register DstReg = MI.getOperand(0).getReg();
1795 
1796   unsigned NewBits = WideTy.getScalarSizeInBits();
1797   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1798 
1799   // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1800   // must not left shift the RHS to preserve the shift amount.
1801   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1802   auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1803                      : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1804   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1805   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1806   auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1807 
1808   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1809                                         {ShiftL, ShiftR}, MI.getFlags());
1810 
1811   // Use a shift that will preserve the number of sign bits when the trunc is
1812   // folded away.
1813   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1814                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1815 
1816   MIRBuilder.buildTrunc(DstReg, Result);
1817   MI.eraseFromParent();
1818   return Legalized;
1819 }
1820 
1821 LegalizerHelper::LegalizeResult
1822 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1823   switch (MI.getOpcode()) {
1824   default:
1825     return UnableToLegalize;
1826   case TargetOpcode::G_EXTRACT:
1827     return widenScalarExtract(MI, TypeIdx, WideTy);
1828   case TargetOpcode::G_INSERT:
1829     return widenScalarInsert(MI, TypeIdx, WideTy);
1830   case TargetOpcode::G_MERGE_VALUES:
1831     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1832   case TargetOpcode::G_UNMERGE_VALUES:
1833     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1834   case TargetOpcode::G_SADDO:
1835   case TargetOpcode::G_SSUBO:
1836   case TargetOpcode::G_UADDO:
1837   case TargetOpcode::G_USUBO:
1838   case TargetOpcode::G_SADDE:
1839   case TargetOpcode::G_SSUBE:
1840   case TargetOpcode::G_UADDE:
1841   case TargetOpcode::G_USUBE:
1842     return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
1843   case TargetOpcode::G_SADDSAT:
1844   case TargetOpcode::G_SSUBSAT:
1845   case TargetOpcode::G_SSHLSAT:
1846   case TargetOpcode::G_UADDSAT:
1847   case TargetOpcode::G_USUBSAT:
1848   case TargetOpcode::G_USHLSAT:
1849     return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1850   case TargetOpcode::G_CTTZ:
1851   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1852   case TargetOpcode::G_CTLZ:
1853   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1854   case TargetOpcode::G_CTPOP: {
1855     if (TypeIdx == 0) {
1856       Observer.changingInstr(MI);
1857       widenScalarDst(MI, WideTy, 0);
1858       Observer.changedInstr(MI);
1859       return Legalized;
1860     }
1861 
1862     Register SrcReg = MI.getOperand(1).getReg();
1863 
1864     // First ZEXT the input.
1865     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1866     LLT CurTy = MRI.getType(SrcReg);
1867     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1868       // The count is the same in the larger type except if the original
1869       // value was zero.  This can be handled by setting the bit just off
1870       // the top of the original type.
1871       auto TopBit =
1872           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1873       MIBSrc = MIRBuilder.buildOr(
1874         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1875     }
1876 
1877     // Perform the operation at the larger size.
1878     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1879     // This is already the correct result for CTPOP and CTTZs
1880     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1881         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1882       // The correct result is NewOp - (Difference in widety and current ty).
1883       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1884       MIBNewOp = MIRBuilder.buildSub(
1885           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1886     }
1887 
1888     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1889     MI.eraseFromParent();
1890     return Legalized;
1891   }
1892   case TargetOpcode::G_BSWAP: {
1893     Observer.changingInstr(MI);
1894     Register DstReg = MI.getOperand(0).getReg();
1895 
1896     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1897     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1898     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1899     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1900 
1901     MI.getOperand(0).setReg(DstExt);
1902 
1903     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1904 
1905     LLT Ty = MRI.getType(DstReg);
1906     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1907     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1908     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1909 
1910     MIRBuilder.buildTrunc(DstReg, ShrReg);
1911     Observer.changedInstr(MI);
1912     return Legalized;
1913   }
1914   case TargetOpcode::G_BITREVERSE: {
1915     Observer.changingInstr(MI);
1916 
1917     Register DstReg = MI.getOperand(0).getReg();
1918     LLT Ty = MRI.getType(DstReg);
1919     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1920 
1921     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1922     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1923     MI.getOperand(0).setReg(DstExt);
1924     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1925 
1926     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1927     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1928     MIRBuilder.buildTrunc(DstReg, Shift);
1929     Observer.changedInstr(MI);
1930     return Legalized;
1931   }
1932   case TargetOpcode::G_FREEZE:
1933     Observer.changingInstr(MI);
1934     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1935     widenScalarDst(MI, WideTy);
1936     Observer.changedInstr(MI);
1937     return Legalized;
1938 
1939   case TargetOpcode::G_ADD:
1940   case TargetOpcode::G_AND:
1941   case TargetOpcode::G_MUL:
1942   case TargetOpcode::G_OR:
1943   case TargetOpcode::G_XOR:
1944   case TargetOpcode::G_SUB:
1945     // Perform operation at larger width (any extension is fines here, high bits
1946     // don't affect the result) and then truncate the result back to the
1947     // original type.
1948     Observer.changingInstr(MI);
1949     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1950     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1951     widenScalarDst(MI, WideTy);
1952     Observer.changedInstr(MI);
1953     return Legalized;
1954 
1955   case TargetOpcode::G_SHL:
1956     Observer.changingInstr(MI);
1957 
1958     if (TypeIdx == 0) {
1959       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1960       widenScalarDst(MI, WideTy);
1961     } else {
1962       assert(TypeIdx == 1);
1963       // The "number of bits to shift" operand must preserve its value as an
1964       // unsigned integer:
1965       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1966     }
1967 
1968     Observer.changedInstr(MI);
1969     return Legalized;
1970 
1971   case TargetOpcode::G_SDIV:
1972   case TargetOpcode::G_SREM:
1973   case TargetOpcode::G_SMIN:
1974   case TargetOpcode::G_SMAX:
1975     Observer.changingInstr(MI);
1976     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1977     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1978     widenScalarDst(MI, WideTy);
1979     Observer.changedInstr(MI);
1980     return Legalized;
1981 
1982   case TargetOpcode::G_ASHR:
1983   case TargetOpcode::G_LSHR:
1984     Observer.changingInstr(MI);
1985 
1986     if (TypeIdx == 0) {
1987       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1988         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1989 
1990       widenScalarSrc(MI, WideTy, 1, CvtOp);
1991       widenScalarDst(MI, WideTy);
1992     } else {
1993       assert(TypeIdx == 1);
1994       // The "number of bits to shift" operand must preserve its value as an
1995       // unsigned integer:
1996       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1997     }
1998 
1999     Observer.changedInstr(MI);
2000     return Legalized;
2001   case TargetOpcode::G_UDIV:
2002   case TargetOpcode::G_UREM:
2003   case TargetOpcode::G_UMIN:
2004   case TargetOpcode::G_UMAX:
2005     Observer.changingInstr(MI);
2006     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2007     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2008     widenScalarDst(MI, WideTy);
2009     Observer.changedInstr(MI);
2010     return Legalized;
2011 
2012   case TargetOpcode::G_SELECT:
2013     Observer.changingInstr(MI);
2014     if (TypeIdx == 0) {
2015       // Perform operation at larger width (any extension is fine here, high
2016       // bits don't affect the result) and then truncate the result back to the
2017       // original type.
2018       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2019       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2020       widenScalarDst(MI, WideTy);
2021     } else {
2022       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2023       // Explicit extension is required here since high bits affect the result.
2024       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2025     }
2026     Observer.changedInstr(MI);
2027     return Legalized;
2028 
2029   case TargetOpcode::G_FPTOSI:
2030   case TargetOpcode::G_FPTOUI:
2031     Observer.changingInstr(MI);
2032 
2033     if (TypeIdx == 0)
2034       widenScalarDst(MI, WideTy);
2035     else
2036       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2037 
2038     Observer.changedInstr(MI);
2039     return Legalized;
2040   case TargetOpcode::G_SITOFP:
2041     Observer.changingInstr(MI);
2042 
2043     if (TypeIdx == 0)
2044       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2045     else
2046       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2047 
2048     Observer.changedInstr(MI);
2049     return Legalized;
2050   case TargetOpcode::G_UITOFP:
2051     Observer.changingInstr(MI);
2052 
2053     if (TypeIdx == 0)
2054       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2055     else
2056       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2057 
2058     Observer.changedInstr(MI);
2059     return Legalized;
2060   case TargetOpcode::G_LOAD:
2061   case TargetOpcode::G_SEXTLOAD:
2062   case TargetOpcode::G_ZEXTLOAD:
2063     Observer.changingInstr(MI);
2064     widenScalarDst(MI, WideTy);
2065     Observer.changedInstr(MI);
2066     return Legalized;
2067 
2068   case TargetOpcode::G_STORE: {
2069     if (TypeIdx != 0)
2070       return UnableToLegalize;
2071 
2072     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2073     if (!Ty.isScalar())
2074       return UnableToLegalize;
2075 
2076     Observer.changingInstr(MI);
2077 
2078     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2079       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2080     widenScalarSrc(MI, WideTy, 0, ExtType);
2081 
2082     Observer.changedInstr(MI);
2083     return Legalized;
2084   }
2085   case TargetOpcode::G_CONSTANT: {
2086     MachineOperand &SrcMO = MI.getOperand(1);
2087     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2088     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2089         MRI.getType(MI.getOperand(0).getReg()));
2090     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2091             ExtOpc == TargetOpcode::G_ANYEXT) &&
2092            "Illegal Extend");
2093     const APInt &SrcVal = SrcMO.getCImm()->getValue();
2094     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2095                            ? SrcVal.sext(WideTy.getSizeInBits())
2096                            : SrcVal.zext(WideTy.getSizeInBits());
2097     Observer.changingInstr(MI);
2098     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2099 
2100     widenScalarDst(MI, WideTy);
2101     Observer.changedInstr(MI);
2102     return Legalized;
2103   }
2104   case TargetOpcode::G_FCONSTANT: {
2105     MachineOperand &SrcMO = MI.getOperand(1);
2106     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2107     APFloat Val = SrcMO.getFPImm()->getValueAPF();
2108     bool LosesInfo;
2109     switch (WideTy.getSizeInBits()) {
2110     case 32:
2111       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
2112                   &LosesInfo);
2113       break;
2114     case 64:
2115       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
2116                   &LosesInfo);
2117       break;
2118     default:
2119       return UnableToLegalize;
2120     }
2121 
2122     assert(!LosesInfo && "extend should always be lossless");
2123 
2124     Observer.changingInstr(MI);
2125     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2126 
2127     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2128     Observer.changedInstr(MI);
2129     return Legalized;
2130   }
2131   case TargetOpcode::G_IMPLICIT_DEF: {
2132     Observer.changingInstr(MI);
2133     widenScalarDst(MI, WideTy);
2134     Observer.changedInstr(MI);
2135     return Legalized;
2136   }
2137   case TargetOpcode::G_BRCOND:
2138     Observer.changingInstr(MI);
2139     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2140     Observer.changedInstr(MI);
2141     return Legalized;
2142 
2143   case TargetOpcode::G_FCMP:
2144     Observer.changingInstr(MI);
2145     if (TypeIdx == 0)
2146       widenScalarDst(MI, WideTy);
2147     else {
2148       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2149       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2150     }
2151     Observer.changedInstr(MI);
2152     return Legalized;
2153 
2154   case TargetOpcode::G_ICMP:
2155     Observer.changingInstr(MI);
2156     if (TypeIdx == 0)
2157       widenScalarDst(MI, WideTy);
2158     else {
2159       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2160                                MI.getOperand(1).getPredicate()))
2161                                ? TargetOpcode::G_SEXT
2162                                : TargetOpcode::G_ZEXT;
2163       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2164       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2165     }
2166     Observer.changedInstr(MI);
2167     return Legalized;
2168 
2169   case TargetOpcode::G_PTR_ADD:
2170     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2171     Observer.changingInstr(MI);
2172     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2173     Observer.changedInstr(MI);
2174     return Legalized;
2175 
2176   case TargetOpcode::G_PHI: {
2177     assert(TypeIdx == 0 && "Expecting only Idx 0");
2178 
2179     Observer.changingInstr(MI);
2180     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2181       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2182       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2183       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2184     }
2185 
2186     MachineBasicBlock &MBB = *MI.getParent();
2187     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2188     widenScalarDst(MI, WideTy);
2189     Observer.changedInstr(MI);
2190     return Legalized;
2191   }
2192   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2193     if (TypeIdx == 0) {
2194       Register VecReg = MI.getOperand(1).getReg();
2195       LLT VecTy = MRI.getType(VecReg);
2196       Observer.changingInstr(MI);
2197 
2198       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
2199                                      WideTy.getSizeInBits()),
2200                      1, TargetOpcode::G_SEXT);
2201 
2202       widenScalarDst(MI, WideTy, 0);
2203       Observer.changedInstr(MI);
2204       return Legalized;
2205     }
2206 
2207     if (TypeIdx != 2)
2208       return UnableToLegalize;
2209     Observer.changingInstr(MI);
2210     // TODO: Probably should be zext
2211     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2212     Observer.changedInstr(MI);
2213     return Legalized;
2214   }
2215   case TargetOpcode::G_INSERT_VECTOR_ELT: {
2216     if (TypeIdx == 1) {
2217       Observer.changingInstr(MI);
2218 
2219       Register VecReg = MI.getOperand(1).getReg();
2220       LLT VecTy = MRI.getType(VecReg);
2221       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2222 
2223       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2224       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2225       widenScalarDst(MI, WideVecTy, 0);
2226       Observer.changedInstr(MI);
2227       return Legalized;
2228     }
2229 
2230     if (TypeIdx == 2) {
2231       Observer.changingInstr(MI);
2232       // TODO: Probably should be zext
2233       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2234       Observer.changedInstr(MI);
2235       return Legalized;
2236     }
2237 
2238     return UnableToLegalize;
2239   }
2240   case TargetOpcode::G_FADD:
2241   case TargetOpcode::G_FMUL:
2242   case TargetOpcode::G_FSUB:
2243   case TargetOpcode::G_FMA:
2244   case TargetOpcode::G_FMAD:
2245   case TargetOpcode::G_FNEG:
2246   case TargetOpcode::G_FABS:
2247   case TargetOpcode::G_FCANONICALIZE:
2248   case TargetOpcode::G_FMINNUM:
2249   case TargetOpcode::G_FMAXNUM:
2250   case TargetOpcode::G_FMINNUM_IEEE:
2251   case TargetOpcode::G_FMAXNUM_IEEE:
2252   case TargetOpcode::G_FMINIMUM:
2253   case TargetOpcode::G_FMAXIMUM:
2254   case TargetOpcode::G_FDIV:
2255   case TargetOpcode::G_FREM:
2256   case TargetOpcode::G_FCEIL:
2257   case TargetOpcode::G_FFLOOR:
2258   case TargetOpcode::G_FCOS:
2259   case TargetOpcode::G_FSIN:
2260   case TargetOpcode::G_FLOG10:
2261   case TargetOpcode::G_FLOG:
2262   case TargetOpcode::G_FLOG2:
2263   case TargetOpcode::G_FRINT:
2264   case TargetOpcode::G_FNEARBYINT:
2265   case TargetOpcode::G_FSQRT:
2266   case TargetOpcode::G_FEXP:
2267   case TargetOpcode::G_FEXP2:
2268   case TargetOpcode::G_FPOW:
2269   case TargetOpcode::G_INTRINSIC_TRUNC:
2270   case TargetOpcode::G_INTRINSIC_ROUND:
2271   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2272     assert(TypeIdx == 0);
2273     Observer.changingInstr(MI);
2274 
2275     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2276       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2277 
2278     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2279     Observer.changedInstr(MI);
2280     return Legalized;
2281   case TargetOpcode::G_FPOWI: {
2282     if (TypeIdx != 0)
2283       return UnableToLegalize;
2284     Observer.changingInstr(MI);
2285     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2286     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2287     Observer.changedInstr(MI);
2288     return Legalized;
2289   }
2290   case TargetOpcode::G_INTTOPTR:
2291     if (TypeIdx != 1)
2292       return UnableToLegalize;
2293 
2294     Observer.changingInstr(MI);
2295     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2296     Observer.changedInstr(MI);
2297     return Legalized;
2298   case TargetOpcode::G_PTRTOINT:
2299     if (TypeIdx != 0)
2300       return UnableToLegalize;
2301 
2302     Observer.changingInstr(MI);
2303     widenScalarDst(MI, WideTy, 0);
2304     Observer.changedInstr(MI);
2305     return Legalized;
2306   case TargetOpcode::G_BUILD_VECTOR: {
2307     Observer.changingInstr(MI);
2308 
2309     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2310     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2311       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2312 
2313     // Avoid changing the result vector type if the source element type was
2314     // requested.
2315     if (TypeIdx == 1) {
2316       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2317     } else {
2318       widenScalarDst(MI, WideTy, 0);
2319     }
2320 
2321     Observer.changedInstr(MI);
2322     return Legalized;
2323   }
2324   case TargetOpcode::G_SEXT_INREG:
2325     if (TypeIdx != 0)
2326       return UnableToLegalize;
2327 
2328     Observer.changingInstr(MI);
2329     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2330     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2331     Observer.changedInstr(MI);
2332     return Legalized;
2333   case TargetOpcode::G_PTRMASK: {
2334     if (TypeIdx != 1)
2335       return UnableToLegalize;
2336     Observer.changingInstr(MI);
2337     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2338     Observer.changedInstr(MI);
2339     return Legalized;
2340   }
2341   }
2342 }
2343 
2344 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2345                              MachineIRBuilder &B, Register Src, LLT Ty) {
2346   auto Unmerge = B.buildUnmerge(Ty, Src);
2347   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2348     Pieces.push_back(Unmerge.getReg(I));
2349 }
2350 
2351 LegalizerHelper::LegalizeResult
2352 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2353   Register Dst = MI.getOperand(0).getReg();
2354   Register Src = MI.getOperand(1).getReg();
2355   LLT DstTy = MRI.getType(Dst);
2356   LLT SrcTy = MRI.getType(Src);
2357 
2358   if (SrcTy.isVector()) {
2359     LLT SrcEltTy = SrcTy.getElementType();
2360     SmallVector<Register, 8> SrcRegs;
2361 
2362     if (DstTy.isVector()) {
2363       int NumDstElt = DstTy.getNumElements();
2364       int NumSrcElt = SrcTy.getNumElements();
2365 
2366       LLT DstEltTy = DstTy.getElementType();
2367       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2368       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2369 
2370       // If there's an element size mismatch, insert intermediate casts to match
2371       // the result element type.
2372       if (NumSrcElt < NumDstElt) { // Source element type is larger.
2373         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2374         //
2375         // =>
2376         //
2377         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2378         // %3:_(<2 x s8>) = G_BITCAST %2
2379         // %4:_(<2 x s8>) = G_BITCAST %3
2380         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2381         DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2382         SrcPartTy = SrcEltTy;
2383       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2384         //
2385         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2386         //
2387         // =>
2388         //
2389         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2390         // %3:_(s16) = G_BITCAST %2
2391         // %4:_(s16) = G_BITCAST %3
2392         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2393         SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2394         DstCastTy = DstEltTy;
2395       }
2396 
2397       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2398       for (Register &SrcReg : SrcRegs)
2399         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2400     } else
2401       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2402 
2403     MIRBuilder.buildMerge(Dst, SrcRegs);
2404     MI.eraseFromParent();
2405     return Legalized;
2406   }
2407 
2408   if (DstTy.isVector()) {
2409     SmallVector<Register, 8> SrcRegs;
2410     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2411     MIRBuilder.buildMerge(Dst, SrcRegs);
2412     MI.eraseFromParent();
2413     return Legalized;
2414   }
2415 
2416   return UnableToLegalize;
2417 }
2418 
2419 /// Figure out the bit offset into a register when coercing a vector index for
2420 /// the wide element type. This is only for the case when promoting vector to
2421 /// one with larger elements.
2422 //
2423 ///
2424 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2425 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2426 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
2427                                                    Register Idx,
2428                                                    unsigned NewEltSize,
2429                                                    unsigned OldEltSize) {
2430   const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2431   LLT IdxTy = B.getMRI()->getType(Idx);
2432 
2433   // Now figure out the amount we need to shift to get the target bits.
2434   auto OffsetMask = B.buildConstant(
2435     IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2436   auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2437   return B.buildShl(IdxTy, OffsetIdx,
2438                     B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2439 }
2440 
2441 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2442 /// is casting to a vector with a smaller element size, perform multiple element
2443 /// extracts and merge the results. If this is coercing to a vector with larger
2444 /// elements, index the bitcasted vector and extract the target element with bit
2445 /// operations. This is intended to force the indexing in the native register
2446 /// size for architectures that can dynamically index the register file.
2447 LegalizerHelper::LegalizeResult
2448 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
2449                                          LLT CastTy) {
2450   if (TypeIdx != 1)
2451     return UnableToLegalize;
2452 
2453   Register Dst = MI.getOperand(0).getReg();
2454   Register SrcVec = MI.getOperand(1).getReg();
2455   Register Idx = MI.getOperand(2).getReg();
2456   LLT SrcVecTy = MRI.getType(SrcVec);
2457   LLT IdxTy = MRI.getType(Idx);
2458 
2459   LLT SrcEltTy = SrcVecTy.getElementType();
2460   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2461   unsigned OldNumElts = SrcVecTy.getNumElements();
2462 
2463   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2464   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2465 
2466   const unsigned NewEltSize = NewEltTy.getSizeInBits();
2467   const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2468   if (NewNumElts > OldNumElts) {
2469     // Decreasing the vector element size
2470     //
2471     // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2472     //  =>
2473     //  v4i32:castx = bitcast x:v2i64
2474     //
2475     // i64 = bitcast
2476     //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2477     //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
2478     //
2479     if (NewNumElts % OldNumElts != 0)
2480       return UnableToLegalize;
2481 
2482     // Type of the intermediate result vector.
2483     const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2484     LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2485 
2486     auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2487 
2488     SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2489     auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2490 
2491     for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2492       auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2493       auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2494       auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2495       NewOps[I] = Elt.getReg(0);
2496     }
2497 
2498     auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2499     MIRBuilder.buildBitcast(Dst, NewVec);
2500     MI.eraseFromParent();
2501     return Legalized;
2502   }
2503 
2504   if (NewNumElts < OldNumElts) {
2505     if (NewEltSize % OldEltSize != 0)
2506       return UnableToLegalize;
2507 
2508     // This only depends on powers of 2 because we use bit tricks to figure out
2509     // the bit offset we need to shift to get the target element. A general
2510     // expansion could emit division/multiply.
2511     if (!isPowerOf2_32(NewEltSize / OldEltSize))
2512       return UnableToLegalize;
2513 
2514     // Increasing the vector element size.
2515     // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2516     //
2517     //   =>
2518     //
2519     // %cast = G_BITCAST %vec
2520     // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2521     // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2522     // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2523     // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2524     // %elt_bits = G_LSHR %wide_elt, %offset_bits
2525     // %elt = G_TRUNC %elt_bits
2526 
2527     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2528     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2529 
2530     // Divide to get the index in the wider element type.
2531     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2532 
2533     Register WideElt = CastVec;
2534     if (CastTy.isVector()) {
2535       WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2536                                                      ScaledIdx).getReg(0);
2537     }
2538 
2539     // Compute the bit offset into the register of the target element.
2540     Register OffsetBits = getBitcastWiderVectorElementOffset(
2541       MIRBuilder, Idx, NewEltSize, OldEltSize);
2542 
2543     // Shift the wide element to get the target element.
2544     auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2545     MIRBuilder.buildTrunc(Dst, ExtractedBits);
2546     MI.eraseFromParent();
2547     return Legalized;
2548   }
2549 
2550   return UnableToLegalize;
2551 }
2552 
2553 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2554 /// TargetReg, while preserving other bits in \p TargetReg.
2555 ///
2556 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2557 static Register buildBitFieldInsert(MachineIRBuilder &B,
2558                                     Register TargetReg, Register InsertReg,
2559                                     Register OffsetBits) {
2560   LLT TargetTy = B.getMRI()->getType(TargetReg);
2561   LLT InsertTy = B.getMRI()->getType(InsertReg);
2562   auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2563   auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2564 
2565   // Produce a bitmask of the value to insert
2566   auto EltMask = B.buildConstant(
2567     TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2568                                    InsertTy.getSizeInBits()));
2569   // Shift it into position
2570   auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2571   auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2572 
2573   // Clear out the bits in the wide element
2574   auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2575 
2576   // The value to insert has all zeros already, so stick it into the masked
2577   // wide element.
2578   return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2579 }
2580 
2581 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2582 /// is increasing the element size, perform the indexing in the target element
2583 /// type, and use bit operations to insert at the element position. This is
2584 /// intended for architectures that can dynamically index the register file and
2585 /// want to force indexing in the native register size.
2586 LegalizerHelper::LegalizeResult
2587 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
2588                                         LLT CastTy) {
2589   if (TypeIdx != 0)
2590     return UnableToLegalize;
2591 
2592   Register Dst = MI.getOperand(0).getReg();
2593   Register SrcVec = MI.getOperand(1).getReg();
2594   Register Val = MI.getOperand(2).getReg();
2595   Register Idx = MI.getOperand(3).getReg();
2596 
2597   LLT VecTy = MRI.getType(Dst);
2598   LLT IdxTy = MRI.getType(Idx);
2599 
2600   LLT VecEltTy = VecTy.getElementType();
2601   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2602   const unsigned NewEltSize = NewEltTy.getSizeInBits();
2603   const unsigned OldEltSize = VecEltTy.getSizeInBits();
2604 
2605   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2606   unsigned OldNumElts = VecTy.getNumElements();
2607 
2608   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2609   if (NewNumElts < OldNumElts) {
2610     if (NewEltSize % OldEltSize != 0)
2611       return UnableToLegalize;
2612 
2613     // This only depends on powers of 2 because we use bit tricks to figure out
2614     // the bit offset we need to shift to get the target element. A general
2615     // expansion could emit division/multiply.
2616     if (!isPowerOf2_32(NewEltSize / OldEltSize))
2617       return UnableToLegalize;
2618 
2619     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2620     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2621 
2622     // Divide to get the index in the wider element type.
2623     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2624 
2625     Register ExtractedElt = CastVec;
2626     if (CastTy.isVector()) {
2627       ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2628                                                           ScaledIdx).getReg(0);
2629     }
2630 
2631     // Compute the bit offset into the register of the target element.
2632     Register OffsetBits = getBitcastWiderVectorElementOffset(
2633       MIRBuilder, Idx, NewEltSize, OldEltSize);
2634 
2635     Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2636                                                Val, OffsetBits);
2637     if (CastTy.isVector()) {
2638       InsertedElt = MIRBuilder.buildInsertVectorElement(
2639         CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2640     }
2641 
2642     MIRBuilder.buildBitcast(Dst, InsertedElt);
2643     MI.eraseFromParent();
2644     return Legalized;
2645   }
2646 
2647   return UnableToLegalize;
2648 }
2649 
2650 LegalizerHelper::LegalizeResult
2651 LegalizerHelper::lowerLoad(MachineInstr &MI) {
2652   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2653   Register DstReg = MI.getOperand(0).getReg();
2654   Register PtrReg = MI.getOperand(1).getReg();
2655   LLT DstTy = MRI.getType(DstReg);
2656   auto &MMO = **MI.memoperands_begin();
2657 
2658   if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2659     if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2660       // This load needs splitting into power of 2 sized loads.
2661       if (DstTy.isVector())
2662         return UnableToLegalize;
2663       if (isPowerOf2_32(DstTy.getSizeInBits()))
2664         return UnableToLegalize; // Don't know what we're being asked to do.
2665 
2666       // Our strategy here is to generate anyextending loads for the smaller
2667       // types up to next power-2 result type, and then combine the two larger
2668       // result values together, before truncating back down to the non-pow-2
2669       // type.
2670       // E.g. v1 = i24 load =>
2671       // v2 = i32 zextload (2 byte)
2672       // v3 = i32 load (1 byte)
2673       // v4 = i32 shl v3, 16
2674       // v5 = i32 or v4, v2
2675       // v1 = i24 trunc v5
2676       // By doing this we generate the correct truncate which should get
2677       // combined away as an artifact with a matching extend.
2678       uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2679       uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2680 
2681       MachineFunction &MF = MIRBuilder.getMF();
2682       MachineMemOperand *LargeMMO =
2683         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2684       MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2685         &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2686 
2687       LLT PtrTy = MRI.getType(PtrReg);
2688       unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2689       LLT AnyExtTy = LLT::scalar(AnyExtSize);
2690       Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2691       Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2692       auto LargeLoad = MIRBuilder.buildLoadInstr(
2693         TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2694 
2695       auto OffsetCst = MIRBuilder.buildConstant(
2696         LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2697       Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2698       auto SmallPtr =
2699         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2700       auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2701                                             *SmallMMO);
2702 
2703       auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2704       auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2705       auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2706       MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2707       MI.eraseFromParent();
2708       return Legalized;
2709     }
2710 
2711     MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2712     MI.eraseFromParent();
2713     return Legalized;
2714   }
2715 
2716   if (DstTy.isScalar()) {
2717     Register TmpReg =
2718       MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2719     MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2720     switch (MI.getOpcode()) {
2721     default:
2722       llvm_unreachable("Unexpected opcode");
2723     case TargetOpcode::G_LOAD:
2724       MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
2725       break;
2726     case TargetOpcode::G_SEXTLOAD:
2727       MIRBuilder.buildSExt(DstReg, TmpReg);
2728       break;
2729     case TargetOpcode::G_ZEXTLOAD:
2730       MIRBuilder.buildZExt(DstReg, TmpReg);
2731       break;
2732     }
2733 
2734     MI.eraseFromParent();
2735     return Legalized;
2736   }
2737 
2738   return UnableToLegalize;
2739 }
2740 
2741 LegalizerHelper::LegalizeResult
2742 LegalizerHelper::lowerStore(MachineInstr &MI) {
2743   // Lower a non-power of 2 store into multiple pow-2 stores.
2744   // E.g. split an i24 store into an i16 store + i8 store.
2745   // We do this by first extending the stored value to the next largest power
2746   // of 2 type, and then using truncating stores to store the components.
2747   // By doing this, likewise with G_LOAD, generate an extend that can be
2748   // artifact-combined away instead of leaving behind extracts.
2749   Register SrcReg = MI.getOperand(0).getReg();
2750   Register PtrReg = MI.getOperand(1).getReg();
2751   LLT SrcTy = MRI.getType(SrcReg);
2752   MachineMemOperand &MMO = **MI.memoperands_begin();
2753   if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2754     return UnableToLegalize;
2755   if (SrcTy.isVector())
2756     return UnableToLegalize;
2757   if (isPowerOf2_32(SrcTy.getSizeInBits()))
2758     return UnableToLegalize; // Don't know what we're being asked to do.
2759 
2760   // Extend to the next pow-2.
2761   const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2762   auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2763 
2764   // Obtain the smaller value by shifting away the larger value.
2765   uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2766   uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2767   auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2768   auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2769 
2770   // Generate the PtrAdd and truncating stores.
2771   LLT PtrTy = MRI.getType(PtrReg);
2772   auto OffsetCst = MIRBuilder.buildConstant(
2773     LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2774   Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2775   auto SmallPtr =
2776     MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2777 
2778   MachineFunction &MF = MIRBuilder.getMF();
2779   MachineMemOperand *LargeMMO =
2780     MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2781   MachineMemOperand *SmallMMO =
2782     MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2783   MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2784   MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2785   MI.eraseFromParent();
2786   return Legalized;
2787 }
2788 
2789 LegalizerHelper::LegalizeResult
2790 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2791   switch (MI.getOpcode()) {
2792   case TargetOpcode::G_LOAD: {
2793     if (TypeIdx != 0)
2794       return UnableToLegalize;
2795 
2796     Observer.changingInstr(MI);
2797     bitcastDst(MI, CastTy, 0);
2798     Observer.changedInstr(MI);
2799     return Legalized;
2800   }
2801   case TargetOpcode::G_STORE: {
2802     if (TypeIdx != 0)
2803       return UnableToLegalize;
2804 
2805     Observer.changingInstr(MI);
2806     bitcastSrc(MI, CastTy, 0);
2807     Observer.changedInstr(MI);
2808     return Legalized;
2809   }
2810   case TargetOpcode::G_SELECT: {
2811     if (TypeIdx != 0)
2812       return UnableToLegalize;
2813 
2814     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2815       LLVM_DEBUG(
2816           dbgs() << "bitcast action not implemented for vector select\n");
2817       return UnableToLegalize;
2818     }
2819 
2820     Observer.changingInstr(MI);
2821     bitcastSrc(MI, CastTy, 2);
2822     bitcastSrc(MI, CastTy, 3);
2823     bitcastDst(MI, CastTy, 0);
2824     Observer.changedInstr(MI);
2825     return Legalized;
2826   }
2827   case TargetOpcode::G_AND:
2828   case TargetOpcode::G_OR:
2829   case TargetOpcode::G_XOR: {
2830     Observer.changingInstr(MI);
2831     bitcastSrc(MI, CastTy, 1);
2832     bitcastSrc(MI, CastTy, 2);
2833     bitcastDst(MI, CastTy, 0);
2834     Observer.changedInstr(MI);
2835     return Legalized;
2836   }
2837   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2838     return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2839   case TargetOpcode::G_INSERT_VECTOR_ELT:
2840     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2841   default:
2842     return UnableToLegalize;
2843   }
2844 }
2845 
2846 // Legalize an instruction by changing the opcode in place.
2847 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2848     Observer.changingInstr(MI);
2849     MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2850     Observer.changedInstr(MI);
2851 }
2852 
2853 LegalizerHelper::LegalizeResult
2854 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2855   using namespace TargetOpcode;
2856 
2857   switch(MI.getOpcode()) {
2858   default:
2859     return UnableToLegalize;
2860   case TargetOpcode::G_BITCAST:
2861     return lowerBitcast(MI);
2862   case TargetOpcode::G_SREM:
2863   case TargetOpcode::G_UREM: {
2864     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2865     auto Quot =
2866         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2867                               {MI.getOperand(1), MI.getOperand(2)});
2868 
2869     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2870     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2871     MI.eraseFromParent();
2872     return Legalized;
2873   }
2874   case TargetOpcode::G_SADDO:
2875   case TargetOpcode::G_SSUBO:
2876     return lowerSADDO_SSUBO(MI);
2877   case TargetOpcode::G_UMULH:
2878   case TargetOpcode::G_SMULH:
2879     return lowerSMULH_UMULH(MI);
2880   case TargetOpcode::G_SMULO:
2881   case TargetOpcode::G_UMULO: {
2882     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2883     // result.
2884     Register Res = MI.getOperand(0).getReg();
2885     Register Overflow = MI.getOperand(1).getReg();
2886     Register LHS = MI.getOperand(2).getReg();
2887     Register RHS = MI.getOperand(3).getReg();
2888     LLT Ty = MRI.getType(Res);
2889 
2890     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2891                           ? TargetOpcode::G_SMULH
2892                           : TargetOpcode::G_UMULH;
2893 
2894     Observer.changingInstr(MI);
2895     const auto &TII = MIRBuilder.getTII();
2896     MI.setDesc(TII.get(TargetOpcode::G_MUL));
2897     MI.RemoveOperand(1);
2898     Observer.changedInstr(MI);
2899 
2900     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2901     auto Zero = MIRBuilder.buildConstant(Ty, 0);
2902 
2903     // Move insert point forward so we can use the Res register if needed.
2904     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2905 
2906     // For *signed* multiply, overflow is detected by checking:
2907     // (hi != (lo >> bitwidth-1))
2908     if (Opcode == TargetOpcode::G_SMULH) {
2909       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2910       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2911       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2912     } else {
2913       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2914     }
2915     return Legalized;
2916   }
2917   case TargetOpcode::G_FNEG: {
2918     Register Res = MI.getOperand(0).getReg();
2919     LLT Ty = MRI.getType(Res);
2920 
2921     // TODO: Handle vector types once we are able to
2922     // represent them.
2923     if (Ty.isVector())
2924       return UnableToLegalize;
2925     auto SignMask =
2926         MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
2927     Register SubByReg = MI.getOperand(1).getReg();
2928     MIRBuilder.buildXor(Res, SubByReg, SignMask);
2929     MI.eraseFromParent();
2930     return Legalized;
2931   }
2932   case TargetOpcode::G_FSUB: {
2933     Register Res = MI.getOperand(0).getReg();
2934     LLT Ty = MRI.getType(Res);
2935 
2936     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2937     // First, check if G_FNEG is marked as Lower. If so, we may
2938     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2939     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2940       return UnableToLegalize;
2941     Register LHS = MI.getOperand(1).getReg();
2942     Register RHS = MI.getOperand(2).getReg();
2943     Register Neg = MRI.createGenericVirtualRegister(Ty);
2944     MIRBuilder.buildFNeg(Neg, RHS);
2945     MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2946     MI.eraseFromParent();
2947     return Legalized;
2948   }
2949   case TargetOpcode::G_FMAD:
2950     return lowerFMad(MI);
2951   case TargetOpcode::G_FFLOOR:
2952     return lowerFFloor(MI);
2953   case TargetOpcode::G_INTRINSIC_ROUND:
2954     return lowerIntrinsicRound(MI);
2955   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
2956     // Since round even is the assumed rounding mode for unconstrained FP
2957     // operations, rint and roundeven are the same operation.
2958     changeOpcode(MI, TargetOpcode::G_FRINT);
2959     return Legalized;
2960   }
2961   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2962     Register OldValRes = MI.getOperand(0).getReg();
2963     Register SuccessRes = MI.getOperand(1).getReg();
2964     Register Addr = MI.getOperand(2).getReg();
2965     Register CmpVal = MI.getOperand(3).getReg();
2966     Register NewVal = MI.getOperand(4).getReg();
2967     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2968                                   **MI.memoperands_begin());
2969     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2970     MI.eraseFromParent();
2971     return Legalized;
2972   }
2973   case TargetOpcode::G_LOAD:
2974   case TargetOpcode::G_SEXTLOAD:
2975   case TargetOpcode::G_ZEXTLOAD:
2976     return lowerLoad(MI);
2977   case TargetOpcode::G_STORE:
2978     return lowerStore(MI);
2979   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2980   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2981   case TargetOpcode::G_CTLZ:
2982   case TargetOpcode::G_CTTZ:
2983   case TargetOpcode::G_CTPOP:
2984     return lowerBitCount(MI);
2985   case G_UADDO: {
2986     Register Res = MI.getOperand(0).getReg();
2987     Register CarryOut = MI.getOperand(1).getReg();
2988     Register LHS = MI.getOperand(2).getReg();
2989     Register RHS = MI.getOperand(3).getReg();
2990 
2991     MIRBuilder.buildAdd(Res, LHS, RHS);
2992     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2993 
2994     MI.eraseFromParent();
2995     return Legalized;
2996   }
2997   case G_UADDE: {
2998     Register Res = MI.getOperand(0).getReg();
2999     Register CarryOut = MI.getOperand(1).getReg();
3000     Register LHS = MI.getOperand(2).getReg();
3001     Register RHS = MI.getOperand(3).getReg();
3002     Register CarryIn = MI.getOperand(4).getReg();
3003     LLT Ty = MRI.getType(Res);
3004 
3005     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3006     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3007     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3008     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3009 
3010     MI.eraseFromParent();
3011     return Legalized;
3012   }
3013   case G_USUBO: {
3014     Register Res = MI.getOperand(0).getReg();
3015     Register BorrowOut = MI.getOperand(1).getReg();
3016     Register LHS = MI.getOperand(2).getReg();
3017     Register RHS = MI.getOperand(3).getReg();
3018 
3019     MIRBuilder.buildSub(Res, LHS, RHS);
3020     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3021 
3022     MI.eraseFromParent();
3023     return Legalized;
3024   }
3025   case G_USUBE: {
3026     Register Res = MI.getOperand(0).getReg();
3027     Register BorrowOut = MI.getOperand(1).getReg();
3028     Register LHS = MI.getOperand(2).getReg();
3029     Register RHS = MI.getOperand(3).getReg();
3030     Register BorrowIn = MI.getOperand(4).getReg();
3031     const LLT CondTy = MRI.getType(BorrowOut);
3032     const LLT Ty = MRI.getType(Res);
3033 
3034     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3035     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3036     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3037 
3038     auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3039     auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3040     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3041 
3042     MI.eraseFromParent();
3043     return Legalized;
3044   }
3045   case G_UITOFP:
3046     return lowerUITOFP(MI);
3047   case G_SITOFP:
3048     return lowerSITOFP(MI);
3049   case G_FPTOUI:
3050     return lowerFPTOUI(MI);
3051   case G_FPTOSI:
3052     return lowerFPTOSI(MI);
3053   case G_FPTRUNC:
3054     return lowerFPTRUNC(MI);
3055   case G_FPOWI:
3056     return lowerFPOWI(MI);
3057   case G_SMIN:
3058   case G_SMAX:
3059   case G_UMIN:
3060   case G_UMAX:
3061     return lowerMinMax(MI);
3062   case G_FCOPYSIGN:
3063     return lowerFCopySign(MI);
3064   case G_FMINNUM:
3065   case G_FMAXNUM:
3066     return lowerFMinNumMaxNum(MI);
3067   case G_MERGE_VALUES:
3068     return lowerMergeValues(MI);
3069   case G_UNMERGE_VALUES:
3070     return lowerUnmergeValues(MI);
3071   case TargetOpcode::G_SEXT_INREG: {
3072     assert(MI.getOperand(2).isImm() && "Expected immediate");
3073     int64_t SizeInBits = MI.getOperand(2).getImm();
3074 
3075     Register DstReg = MI.getOperand(0).getReg();
3076     Register SrcReg = MI.getOperand(1).getReg();
3077     LLT DstTy = MRI.getType(DstReg);
3078     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3079 
3080     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3081     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3082     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3083     MI.eraseFromParent();
3084     return Legalized;
3085   }
3086   case G_EXTRACT_VECTOR_ELT:
3087   case G_INSERT_VECTOR_ELT:
3088     return lowerExtractInsertVectorElt(MI);
3089   case G_SHUFFLE_VECTOR:
3090     return lowerShuffleVector(MI);
3091   case G_DYN_STACKALLOC:
3092     return lowerDynStackAlloc(MI);
3093   case G_EXTRACT:
3094     return lowerExtract(MI);
3095   case G_INSERT:
3096     return lowerInsert(MI);
3097   case G_BSWAP:
3098     return lowerBswap(MI);
3099   case G_BITREVERSE:
3100     return lowerBitreverse(MI);
3101   case G_READ_REGISTER:
3102   case G_WRITE_REGISTER:
3103     return lowerReadWriteRegister(MI);
3104   case G_UADDSAT:
3105   case G_USUBSAT: {
3106     // Try to make a reasonable guess about which lowering strategy to use. The
3107     // target can override this with custom lowering and calling the
3108     // implementation functions.
3109     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3110     if (LI.isLegalOrCustom({G_UMIN, Ty}))
3111       return lowerAddSubSatToMinMax(MI);
3112     return lowerAddSubSatToAddoSubo(MI);
3113   }
3114   case G_SADDSAT:
3115   case G_SSUBSAT: {
3116     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3117 
3118     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3119     // since it's a shorter expansion. However, we would need to figure out the
3120     // preferred boolean type for the carry out for the query.
3121     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3122       return lowerAddSubSatToMinMax(MI);
3123     return lowerAddSubSatToAddoSubo(MI);
3124   }
3125   case G_SSHLSAT:
3126   case G_USHLSAT:
3127     return lowerShlSat(MI);
3128   case G_ABS: {
3129     // Expand %res = G_ABS %a into:
3130     // %v1 = G_ASHR %a, scalar_size-1
3131     // %v2 = G_ADD %a, %v1
3132     // %res = G_XOR %v2, %v1
3133     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3134     Register OpReg = MI.getOperand(1).getReg();
3135     auto ShiftAmt =
3136         MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
3137     auto Shift =
3138         MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
3139     auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
3140     MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
3141     MI.eraseFromParent();
3142     return Legalized;
3143   }
3144   case G_SELECT:
3145     return lowerSelect(MI);
3146   }
3147 }
3148 
3149 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3150                                                   Align MinAlign) const {
3151   // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3152   // datalayout for the preferred alignment. Also there should be a target hook
3153   // for this to allow targets to reduce the alignment and ignore the
3154   // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3155   // the type.
3156   return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3157 }
3158 
3159 MachineInstrBuilder
3160 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3161                                       MachinePointerInfo &PtrInfo) {
3162   MachineFunction &MF = MIRBuilder.getMF();
3163   const DataLayout &DL = MIRBuilder.getDataLayout();
3164   int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3165 
3166   unsigned AddrSpace = DL.getAllocaAddrSpace();
3167   LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3168 
3169   PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3170   return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3171 }
3172 
3173 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3174                                         LLT VecTy) {
3175   int64_t IdxVal;
3176   if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3177     return IdxReg;
3178 
3179   LLT IdxTy = B.getMRI()->getType(IdxReg);
3180   unsigned NElts = VecTy.getNumElements();
3181   if (isPowerOf2_32(NElts)) {
3182     APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3183     return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3184   }
3185 
3186   return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3187       .getReg(0);
3188 }
3189 
3190 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3191                                                   Register Index) {
3192   LLT EltTy = VecTy.getElementType();
3193 
3194   // Calculate the element offset and add it to the pointer.
3195   unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3196   assert(EltSize * 8 == EltTy.getSizeInBits() &&
3197          "Converting bits to bytes lost precision");
3198 
3199   Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3200 
3201   LLT IdxTy = MRI.getType(Index);
3202   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3203                                  MIRBuilder.buildConstant(IdxTy, EltSize));
3204 
3205   LLT PtrTy = MRI.getType(VecPtr);
3206   return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3207 }
3208 
3209 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
3210     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3211   Register DstReg = MI.getOperand(0).getReg();
3212   LLT DstTy = MRI.getType(DstReg);
3213   LLT LCMTy = getLCMType(DstTy, NarrowTy);
3214 
3215   unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3216 
3217   auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3218   SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3219 
3220   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3221   MI.eraseFromParent();
3222   return Legalized;
3223 }
3224 
3225 // Handle splitting vector operations which need to have the same number of
3226 // elements in each type index, but each type index may have a different element
3227 // type.
3228 //
3229 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3230 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3231 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3232 //
3233 // Also handles some irregular breakdown cases, e.g.
3234 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3235 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3236 //             s64 = G_SHL s64, s32
3237 LegalizerHelper::LegalizeResult
3238 LegalizerHelper::fewerElementsVectorMultiEltType(
3239   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3240   if (TypeIdx != 0)
3241     return UnableToLegalize;
3242 
3243   const LLT NarrowTy0 = NarrowTyArg;
3244   const unsigned NewNumElts =
3245       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3246 
3247   const Register DstReg = MI.getOperand(0).getReg();
3248   LLT DstTy = MRI.getType(DstReg);
3249   LLT LeftoverTy0;
3250 
3251   // All of the operands need to have the same number of elements, so if we can
3252   // determine a type breakdown for the result type, we can for all of the
3253   // source types.
3254   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3255   if (NumParts < 0)
3256     return UnableToLegalize;
3257 
3258   SmallVector<MachineInstrBuilder, 4> NewInsts;
3259 
3260   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3261   SmallVector<Register, 4> PartRegs, LeftoverRegs;
3262 
3263   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3264     Register SrcReg = MI.getOperand(I).getReg();
3265     LLT SrcTyI = MRI.getType(SrcReg);
3266     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3267     LLT LeftoverTyI;
3268 
3269     // Split this operand into the requested typed registers, and any leftover
3270     // required to reproduce the original type.
3271     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3272                       LeftoverRegs))
3273       return UnableToLegalize;
3274 
3275     if (I == 1) {
3276       // For the first operand, create an instruction for each part and setup
3277       // the result.
3278       for (Register PartReg : PartRegs) {
3279         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3280         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3281                                .addDef(PartDstReg)
3282                                .addUse(PartReg));
3283         DstRegs.push_back(PartDstReg);
3284       }
3285 
3286       for (Register LeftoverReg : LeftoverRegs) {
3287         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3288         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3289                                .addDef(PartDstReg)
3290                                .addUse(LeftoverReg));
3291         LeftoverDstRegs.push_back(PartDstReg);
3292       }
3293     } else {
3294       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3295 
3296       // Add the newly created operand splits to the existing instructions. The
3297       // odd-sized pieces are ordered after the requested NarrowTyArg sized
3298       // pieces.
3299       unsigned InstCount = 0;
3300       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3301         NewInsts[InstCount++].addUse(PartRegs[J]);
3302       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3303         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3304     }
3305 
3306     PartRegs.clear();
3307     LeftoverRegs.clear();
3308   }
3309 
3310   // Insert the newly built operations and rebuild the result register.
3311   for (auto &MIB : NewInsts)
3312     MIRBuilder.insertInstr(MIB);
3313 
3314   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3315 
3316   MI.eraseFromParent();
3317   return Legalized;
3318 }
3319 
3320 LegalizerHelper::LegalizeResult
3321 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
3322                                           LLT NarrowTy) {
3323   if (TypeIdx != 0)
3324     return UnableToLegalize;
3325 
3326   Register DstReg = MI.getOperand(0).getReg();
3327   Register SrcReg = MI.getOperand(1).getReg();
3328   LLT DstTy = MRI.getType(DstReg);
3329   LLT SrcTy = MRI.getType(SrcReg);
3330 
3331   LLT NarrowTy0 = NarrowTy;
3332   LLT NarrowTy1;
3333   unsigned NumParts;
3334 
3335   if (NarrowTy.isVector()) {
3336     // Uneven breakdown not handled.
3337     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3338     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3339       return UnableToLegalize;
3340 
3341     NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3342   } else {
3343     NumParts = DstTy.getNumElements();
3344     NarrowTy1 = SrcTy.getElementType();
3345   }
3346 
3347   SmallVector<Register, 4> SrcRegs, DstRegs;
3348   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3349 
3350   for (unsigned I = 0; I < NumParts; ++I) {
3351     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3352     MachineInstr *NewInst =
3353         MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3354 
3355     NewInst->setFlags(MI.getFlags());
3356     DstRegs.push_back(DstReg);
3357   }
3358 
3359   if (NarrowTy.isVector())
3360     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3361   else
3362     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3363 
3364   MI.eraseFromParent();
3365   return Legalized;
3366 }
3367 
3368 LegalizerHelper::LegalizeResult
3369 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
3370                                         LLT NarrowTy) {
3371   Register DstReg = MI.getOperand(0).getReg();
3372   Register Src0Reg = MI.getOperand(2).getReg();
3373   LLT DstTy = MRI.getType(DstReg);
3374   LLT SrcTy = MRI.getType(Src0Reg);
3375 
3376   unsigned NumParts;
3377   LLT NarrowTy0, NarrowTy1;
3378 
3379   if (TypeIdx == 0) {
3380     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3381     unsigned OldElts = DstTy.getNumElements();
3382 
3383     NarrowTy0 = NarrowTy;
3384     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3385     NarrowTy1 = NarrowTy.isVector() ?
3386       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3387       SrcTy.getElementType();
3388 
3389   } else {
3390     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3391     unsigned OldElts = SrcTy.getNumElements();
3392 
3393     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3394       NarrowTy.getNumElements();
3395     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3396                             DstTy.getScalarSizeInBits());
3397     NarrowTy1 = NarrowTy;
3398   }
3399 
3400   // FIXME: Don't know how to handle the situation where the small vectors
3401   // aren't all the same size yet.
3402   if (NarrowTy1.isVector() &&
3403       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3404     return UnableToLegalize;
3405 
3406   CmpInst::Predicate Pred
3407     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3408 
3409   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3410   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3411   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3412 
3413   for (unsigned I = 0; I < NumParts; ++I) {
3414     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3415     DstRegs.push_back(DstReg);
3416 
3417     if (MI.getOpcode() == TargetOpcode::G_ICMP)
3418       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3419     else {
3420       MachineInstr *NewCmp
3421         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3422       NewCmp->setFlags(MI.getFlags());
3423     }
3424   }
3425 
3426   if (NarrowTy1.isVector())
3427     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3428   else
3429     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3430 
3431   MI.eraseFromParent();
3432   return Legalized;
3433 }
3434 
3435 LegalizerHelper::LegalizeResult
3436 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
3437                                            LLT NarrowTy) {
3438   Register DstReg = MI.getOperand(0).getReg();
3439   Register CondReg = MI.getOperand(1).getReg();
3440 
3441   unsigned NumParts = 0;
3442   LLT NarrowTy0, NarrowTy1;
3443 
3444   LLT DstTy = MRI.getType(DstReg);
3445   LLT CondTy = MRI.getType(CondReg);
3446   unsigned Size = DstTy.getSizeInBits();
3447 
3448   assert(TypeIdx == 0 || CondTy.isVector());
3449 
3450   if (TypeIdx == 0) {
3451     NarrowTy0 = NarrowTy;
3452     NarrowTy1 = CondTy;
3453 
3454     unsigned NarrowSize = NarrowTy0.getSizeInBits();
3455     // FIXME: Don't know how to handle the situation where the small vectors
3456     // aren't all the same size yet.
3457     if (Size % NarrowSize != 0)
3458       return UnableToLegalize;
3459 
3460     NumParts = Size / NarrowSize;
3461 
3462     // Need to break down the condition type
3463     if (CondTy.isVector()) {
3464       if (CondTy.getNumElements() == NumParts)
3465         NarrowTy1 = CondTy.getElementType();
3466       else
3467         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3468                                 CondTy.getScalarSizeInBits());
3469     }
3470   } else {
3471     NumParts = CondTy.getNumElements();
3472     if (NarrowTy.isVector()) {
3473       // TODO: Handle uneven breakdown.
3474       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3475         return UnableToLegalize;
3476 
3477       return UnableToLegalize;
3478     } else {
3479       NarrowTy0 = DstTy.getElementType();
3480       NarrowTy1 = NarrowTy;
3481     }
3482   }
3483 
3484   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3485   if (CondTy.isVector())
3486     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3487 
3488   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3489   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3490 
3491   for (unsigned i = 0; i < NumParts; ++i) {
3492     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3493     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3494                            Src1Regs[i], Src2Regs[i]);
3495     DstRegs.push_back(DstReg);
3496   }
3497 
3498   if (NarrowTy0.isVector())
3499     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3500   else
3501     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3502 
3503   MI.eraseFromParent();
3504   return Legalized;
3505 }
3506 
3507 LegalizerHelper::LegalizeResult
3508 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3509                                         LLT NarrowTy) {
3510   const Register DstReg = MI.getOperand(0).getReg();
3511   LLT PhiTy = MRI.getType(DstReg);
3512   LLT LeftoverTy;
3513 
3514   // All of the operands need to have the same number of elements, so if we can
3515   // determine a type breakdown for the result type, we can for all of the
3516   // source types.
3517   int NumParts, NumLeftover;
3518   std::tie(NumParts, NumLeftover)
3519     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3520   if (NumParts < 0)
3521     return UnableToLegalize;
3522 
3523   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3524   SmallVector<MachineInstrBuilder, 4> NewInsts;
3525 
3526   const int TotalNumParts = NumParts + NumLeftover;
3527 
3528   // Insert the new phis in the result block first.
3529   for (int I = 0; I != TotalNumParts; ++I) {
3530     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3531     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3532     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3533                        .addDef(PartDstReg));
3534     if (I < NumParts)
3535       DstRegs.push_back(PartDstReg);
3536     else
3537       LeftoverDstRegs.push_back(PartDstReg);
3538   }
3539 
3540   MachineBasicBlock *MBB = MI.getParent();
3541   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
3542   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3543 
3544   SmallVector<Register, 4> PartRegs, LeftoverRegs;
3545 
3546   // Insert code to extract the incoming values in each predecessor block.
3547   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3548     PartRegs.clear();
3549     LeftoverRegs.clear();
3550 
3551     Register SrcReg = MI.getOperand(I).getReg();
3552     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3553     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3554 
3555     LLT Unused;
3556     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3557                       LeftoverRegs))
3558       return UnableToLegalize;
3559 
3560     // Add the newly created operand splits to the existing instructions. The
3561     // odd-sized pieces are ordered after the requested NarrowTyArg sized
3562     // pieces.
3563     for (int J = 0; J != TotalNumParts; ++J) {
3564       MachineInstrBuilder MIB = NewInsts[J];
3565       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3566       MIB.addMBB(&OpMBB);
3567     }
3568   }
3569 
3570   MI.eraseFromParent();
3571   return Legalized;
3572 }
3573 
3574 LegalizerHelper::LegalizeResult
3575 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
3576                                                   unsigned TypeIdx,
3577                                                   LLT NarrowTy) {
3578   if (TypeIdx != 1)
3579     return UnableToLegalize;
3580 
3581   const int NumDst = MI.getNumOperands() - 1;
3582   const Register SrcReg = MI.getOperand(NumDst).getReg();
3583   LLT SrcTy = MRI.getType(SrcReg);
3584 
3585   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3586 
3587   // TODO: Create sequence of extracts.
3588   if (DstTy == NarrowTy)
3589     return UnableToLegalize;
3590 
3591   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3592   if (DstTy == GCDTy) {
3593     // This would just be a copy of the same unmerge.
3594     // TODO: Create extracts, pad with undef and create intermediate merges.
3595     return UnableToLegalize;
3596   }
3597 
3598   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3599   const int NumUnmerge = Unmerge->getNumOperands() - 1;
3600   const int PartsPerUnmerge = NumDst / NumUnmerge;
3601 
3602   for (int I = 0; I != NumUnmerge; ++I) {
3603     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3604 
3605     for (int J = 0; J != PartsPerUnmerge; ++J)
3606       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3607     MIB.addUse(Unmerge.getReg(I));
3608   }
3609 
3610   MI.eraseFromParent();
3611   return Legalized;
3612 }
3613 
3614 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3615 // a vector
3616 //
3617 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3618 // undef as necessary.
3619 //
3620 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3621 //   -> <2 x s16>
3622 //
3623 // %4:_(s16) = G_IMPLICIT_DEF
3624 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3625 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3626 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
3627 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3628 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3629 LegalizerHelper::LegalizeResult
3630 LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
3631                                           LLT NarrowTy) {
3632   Register DstReg = MI.getOperand(0).getReg();
3633   LLT DstTy = MRI.getType(DstReg);
3634   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3635   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3636 
3637   // Break into a common type
3638   SmallVector<Register, 16> Parts;
3639   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3640     extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3641 
3642   // Build the requested new merge, padding with undef.
3643   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3644                                   TargetOpcode::G_ANYEXT);
3645 
3646   // Pack into the original result register.
3647   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3648 
3649   MI.eraseFromParent();
3650   return Legalized;
3651 }
3652 
3653 LegalizerHelper::LegalizeResult
3654 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
3655                                                            unsigned TypeIdx,
3656                                                            LLT NarrowVecTy) {
3657   Register DstReg = MI.getOperand(0).getReg();
3658   Register SrcVec = MI.getOperand(1).getReg();
3659   Register InsertVal;
3660   bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3661 
3662   assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3663   if (IsInsert)
3664     InsertVal = MI.getOperand(2).getReg();
3665 
3666   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3667 
3668   // TODO: Handle total scalarization case.
3669   if (!NarrowVecTy.isVector())
3670     return UnableToLegalize;
3671 
3672   LLT VecTy = MRI.getType(SrcVec);
3673 
3674   // If the index is a constant, we can really break this down as you would
3675   // expect, and index into the target size pieces.
3676   int64_t IdxVal;
3677   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
3678     // Avoid out of bounds indexing the pieces.
3679     if (IdxVal >= VecTy.getNumElements()) {
3680       MIRBuilder.buildUndef(DstReg);
3681       MI.eraseFromParent();
3682       return Legalized;
3683     }
3684 
3685     SmallVector<Register, 8> VecParts;
3686     LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3687 
3688     // Build a sequence of NarrowTy pieces in VecParts for this operand.
3689     LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3690                                     TargetOpcode::G_ANYEXT);
3691 
3692     unsigned NewNumElts = NarrowVecTy.getNumElements();
3693 
3694     LLT IdxTy = MRI.getType(Idx);
3695     int64_t PartIdx = IdxVal / NewNumElts;
3696     auto NewIdx =
3697         MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3698 
3699     if (IsInsert) {
3700       LLT PartTy = MRI.getType(VecParts[PartIdx]);
3701 
3702       // Use the adjusted index to insert into one of the subvectors.
3703       auto InsertPart = MIRBuilder.buildInsertVectorElement(
3704           PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3705       VecParts[PartIdx] = InsertPart.getReg(0);
3706 
3707       // Recombine the inserted subvector with the others to reform the result
3708       // vector.
3709       buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3710     } else {
3711       MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3712     }
3713 
3714     MI.eraseFromParent();
3715     return Legalized;
3716   }
3717 
3718   // With a variable index, we can't perform the operation in a smaller type, so
3719   // we're forced to expand this.
3720   //
3721   // TODO: We could emit a chain of compare/select to figure out which piece to
3722   // index.
3723   return lowerExtractInsertVectorElt(MI);
3724 }
3725 
3726 LegalizerHelper::LegalizeResult
3727 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3728                                       LLT NarrowTy) {
3729   // FIXME: Don't know how to handle secondary types yet.
3730   if (TypeIdx != 0)
3731     return UnableToLegalize;
3732 
3733   MachineMemOperand *MMO = *MI.memoperands_begin();
3734 
3735   // This implementation doesn't work for atomics. Give up instead of doing
3736   // something invalid.
3737   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3738       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3739     return UnableToLegalize;
3740 
3741   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3742   Register ValReg = MI.getOperand(0).getReg();
3743   Register AddrReg = MI.getOperand(1).getReg();
3744   LLT ValTy = MRI.getType(ValReg);
3745 
3746   // FIXME: Do we need a distinct NarrowMemory legalize action?
3747   if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3748     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3749     return UnableToLegalize;
3750   }
3751 
3752   int NumParts = -1;
3753   int NumLeftover = -1;
3754   LLT LeftoverTy;
3755   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3756   if (IsLoad) {
3757     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3758   } else {
3759     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3760                      NarrowLeftoverRegs)) {
3761       NumParts = NarrowRegs.size();
3762       NumLeftover = NarrowLeftoverRegs.size();
3763     }
3764   }
3765 
3766   if (NumParts == -1)
3767     return UnableToLegalize;
3768 
3769   LLT PtrTy = MRI.getType(AddrReg);
3770   const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3771 
3772   unsigned TotalSize = ValTy.getSizeInBits();
3773 
3774   // Split the load/store into PartTy sized pieces starting at Offset. If this
3775   // is a load, return the new registers in ValRegs. For a store, each elements
3776   // of ValRegs should be PartTy. Returns the next offset that needs to be
3777   // handled.
3778   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3779                              unsigned Offset) -> unsigned {
3780     MachineFunction &MF = MIRBuilder.getMF();
3781     unsigned PartSize = PartTy.getSizeInBits();
3782     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3783          Offset += PartSize, ++Idx) {
3784       unsigned ByteSize = PartSize / 8;
3785       unsigned ByteOffset = Offset / 8;
3786       Register NewAddrReg;
3787 
3788       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3789 
3790       MachineMemOperand *NewMMO =
3791         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3792 
3793       if (IsLoad) {
3794         Register Dst = MRI.createGenericVirtualRegister(PartTy);
3795         ValRegs.push_back(Dst);
3796         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3797       } else {
3798         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3799       }
3800     }
3801 
3802     return Offset;
3803   };
3804 
3805   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3806 
3807   // Handle the rest of the register if this isn't an even type breakdown.
3808   if (LeftoverTy.isValid())
3809     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3810 
3811   if (IsLoad) {
3812     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3813                 LeftoverTy, NarrowLeftoverRegs);
3814   }
3815 
3816   MI.eraseFromParent();
3817   return Legalized;
3818 }
3819 
3820 LegalizerHelper::LegalizeResult
3821 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
3822                                       LLT NarrowTy) {
3823   assert(TypeIdx == 0 && "only one type index expected");
3824 
3825   const unsigned Opc = MI.getOpcode();
3826   const int NumOps = MI.getNumOperands() - 1;
3827   const Register DstReg = MI.getOperand(0).getReg();
3828   const unsigned Flags = MI.getFlags();
3829   const unsigned NarrowSize = NarrowTy.getSizeInBits();
3830   const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3831 
3832   assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3833 
3834   // First of all check whether we are narrowing (changing the element type)
3835   // or reducing the vector elements
3836   const LLT DstTy = MRI.getType(DstReg);
3837   const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3838 
3839   SmallVector<Register, 8> ExtractedRegs[3];
3840   SmallVector<Register, 8> Parts;
3841 
3842   unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3843 
3844   // Break down all the sources into NarrowTy pieces we can operate on. This may
3845   // involve creating merges to a wider type, padded with undef.
3846   for (int I = 0; I != NumOps; ++I) {
3847     Register SrcReg = MI.getOperand(I + 1).getReg();
3848     LLT SrcTy = MRI.getType(SrcReg);
3849 
3850     // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3851     // For fewerElements, this is a smaller vector with the same element type.
3852     LLT OpNarrowTy;
3853     if (IsNarrow) {
3854       OpNarrowTy = NarrowScalarTy;
3855 
3856       // In case of narrowing, we need to cast vectors to scalars for this to
3857       // work properly
3858       // FIXME: Can we do without the bitcast here if we're narrowing?
3859       if (SrcTy.isVector()) {
3860         SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3861         SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3862       }
3863     } else {
3864       OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
3865     }
3866 
3867     LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
3868 
3869     // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
3870     buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
3871                         TargetOpcode::G_ANYEXT);
3872   }
3873 
3874   SmallVector<Register, 8> ResultRegs;
3875 
3876   // Input operands for each sub-instruction.
3877   SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
3878 
3879   int NumParts = ExtractedRegs[0].size();
3880   const unsigned DstSize = DstTy.getSizeInBits();
3881   const LLT DstScalarTy = LLT::scalar(DstSize);
3882 
3883   // Narrowing needs to use scalar types
3884   LLT DstLCMTy, NarrowDstTy;
3885   if (IsNarrow) {
3886     DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
3887     NarrowDstTy = NarrowScalarTy;
3888   } else {
3889     DstLCMTy = getLCMType(DstTy, NarrowTy);
3890     NarrowDstTy = NarrowTy;
3891   }
3892 
3893   // We widened the source registers to satisfy merge/unmerge size
3894   // constraints. We'll have some extra fully undef parts.
3895   const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
3896 
3897   for (int I = 0; I != NumRealParts; ++I) {
3898     // Emit this instruction on each of the split pieces.
3899     for (int J = 0; J != NumOps; ++J)
3900       InputRegs[J] = ExtractedRegs[J][I];
3901 
3902     auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
3903     ResultRegs.push_back(Inst.getReg(0));
3904   }
3905 
3906   // Fill out the widened result with undef instead of creating instructions
3907   // with undef inputs.
3908   int NumUndefParts = NumParts - NumRealParts;
3909   if (NumUndefParts != 0)
3910     ResultRegs.append(NumUndefParts,
3911                       MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
3912 
3913   // Extract the possibly padded result. Use a scratch register if we need to do
3914   // a final bitcast, otherwise use the original result register.
3915   Register MergeDstReg;
3916   if (IsNarrow && DstTy.isVector())
3917     MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
3918   else
3919     MergeDstReg = DstReg;
3920 
3921   buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
3922 
3923   // Recast to vector if we narrowed a vector
3924   if (IsNarrow && DstTy.isVector())
3925     MIRBuilder.buildBitcast(DstReg, MergeDstReg);
3926 
3927   MI.eraseFromParent();
3928   return Legalized;
3929 }
3930 
3931 LegalizerHelper::LegalizeResult
3932 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
3933                                               LLT NarrowTy) {
3934   Register DstReg = MI.getOperand(0).getReg();
3935   Register SrcReg = MI.getOperand(1).getReg();
3936   int64_t Imm = MI.getOperand(2).getImm();
3937 
3938   LLT DstTy = MRI.getType(DstReg);
3939 
3940   SmallVector<Register, 8> Parts;
3941   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3942   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3943 
3944   for (Register &R : Parts)
3945     R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
3946 
3947   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3948 
3949   MI.eraseFromParent();
3950   return Legalized;
3951 }
3952 
3953 LegalizerHelper::LegalizeResult
3954 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
3955                                      LLT NarrowTy) {
3956   using namespace TargetOpcode;
3957 
3958   switch (MI.getOpcode()) {
3959   case G_IMPLICIT_DEF:
3960     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3961   case G_TRUNC:
3962   case G_AND:
3963   case G_OR:
3964   case G_XOR:
3965   case G_ADD:
3966   case G_SUB:
3967   case G_MUL:
3968   case G_PTR_ADD:
3969   case G_SMULH:
3970   case G_UMULH:
3971   case G_FADD:
3972   case G_FMUL:
3973   case G_FSUB:
3974   case G_FNEG:
3975   case G_FABS:
3976   case G_FCANONICALIZE:
3977   case G_FDIV:
3978   case G_FREM:
3979   case G_FMA:
3980   case G_FMAD:
3981   case G_FPOW:
3982   case G_FEXP:
3983   case G_FEXP2:
3984   case G_FLOG:
3985   case G_FLOG2:
3986   case G_FLOG10:
3987   case G_FNEARBYINT:
3988   case G_FCEIL:
3989   case G_FFLOOR:
3990   case G_FRINT:
3991   case G_INTRINSIC_ROUND:
3992   case G_INTRINSIC_ROUNDEVEN:
3993   case G_INTRINSIC_TRUNC:
3994   case G_FCOS:
3995   case G_FSIN:
3996   case G_FSQRT:
3997   case G_BSWAP:
3998   case G_BITREVERSE:
3999   case G_SDIV:
4000   case G_UDIV:
4001   case G_SREM:
4002   case G_UREM:
4003   case G_SMIN:
4004   case G_SMAX:
4005   case G_UMIN:
4006   case G_UMAX:
4007   case G_FMINNUM:
4008   case G_FMAXNUM:
4009   case G_FMINNUM_IEEE:
4010   case G_FMAXNUM_IEEE:
4011   case G_FMINIMUM:
4012   case G_FMAXIMUM:
4013   case G_FSHL:
4014   case G_FSHR:
4015   case G_FREEZE:
4016   case G_SADDSAT:
4017   case G_SSUBSAT:
4018   case G_UADDSAT:
4019   case G_USUBSAT:
4020     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4021   case G_SHL:
4022   case G_LSHR:
4023   case G_ASHR:
4024   case G_SSHLSAT:
4025   case G_USHLSAT:
4026   case G_CTLZ:
4027   case G_CTLZ_ZERO_UNDEF:
4028   case G_CTTZ:
4029   case G_CTTZ_ZERO_UNDEF:
4030   case G_CTPOP:
4031   case G_FCOPYSIGN:
4032     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4033   case G_ZEXT:
4034   case G_SEXT:
4035   case G_ANYEXT:
4036   case G_FPEXT:
4037   case G_FPTRUNC:
4038   case G_SITOFP:
4039   case G_UITOFP:
4040   case G_FPTOSI:
4041   case G_FPTOUI:
4042   case G_INTTOPTR:
4043   case G_PTRTOINT:
4044   case G_ADDRSPACE_CAST:
4045     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4046   case G_ICMP:
4047   case G_FCMP:
4048     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4049   case G_SELECT:
4050     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4051   case G_PHI:
4052     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4053   case G_UNMERGE_VALUES:
4054     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4055   case G_BUILD_VECTOR:
4056     assert(TypeIdx == 0 && "not a vector type index");
4057     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4058   case G_CONCAT_VECTORS:
4059     if (TypeIdx != 1) // TODO: This probably does work as expected already.
4060       return UnableToLegalize;
4061     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4062   case G_EXTRACT_VECTOR_ELT:
4063   case G_INSERT_VECTOR_ELT:
4064     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4065   case G_LOAD:
4066   case G_STORE:
4067     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4068   case G_SEXT_INREG:
4069     return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4070   default:
4071     return UnableToLegalize;
4072   }
4073 }
4074 
4075 LegalizerHelper::LegalizeResult
4076 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
4077                                              const LLT HalfTy, const LLT AmtTy) {
4078 
4079   Register InL = MRI.createGenericVirtualRegister(HalfTy);
4080   Register InH = MRI.createGenericVirtualRegister(HalfTy);
4081   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4082 
4083   if (Amt.isNullValue()) {
4084     MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4085     MI.eraseFromParent();
4086     return Legalized;
4087   }
4088 
4089   LLT NVT = HalfTy;
4090   unsigned NVTBits = HalfTy.getSizeInBits();
4091   unsigned VTBits = 2 * NVTBits;
4092 
4093   SrcOp Lo(Register(0)), Hi(Register(0));
4094   if (MI.getOpcode() == TargetOpcode::G_SHL) {
4095     if (Amt.ugt(VTBits)) {
4096       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4097     } else if (Amt.ugt(NVTBits)) {
4098       Lo = MIRBuilder.buildConstant(NVT, 0);
4099       Hi = MIRBuilder.buildShl(NVT, InL,
4100                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4101     } else if (Amt == NVTBits) {
4102       Lo = MIRBuilder.buildConstant(NVT, 0);
4103       Hi = InL;
4104     } else {
4105       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4106       auto OrLHS =
4107           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4108       auto OrRHS = MIRBuilder.buildLShr(
4109           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4110       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4111     }
4112   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4113     if (Amt.ugt(VTBits)) {
4114       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4115     } else if (Amt.ugt(NVTBits)) {
4116       Lo = MIRBuilder.buildLShr(NVT, InH,
4117                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4118       Hi = MIRBuilder.buildConstant(NVT, 0);
4119     } else if (Amt == NVTBits) {
4120       Lo = InH;
4121       Hi = MIRBuilder.buildConstant(NVT, 0);
4122     } else {
4123       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4124 
4125       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4126       auto OrRHS = MIRBuilder.buildShl(
4127           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4128 
4129       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4130       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4131     }
4132   } else {
4133     if (Amt.ugt(VTBits)) {
4134       Hi = Lo = MIRBuilder.buildAShr(
4135           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4136     } else if (Amt.ugt(NVTBits)) {
4137       Lo = MIRBuilder.buildAShr(NVT, InH,
4138                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4139       Hi = MIRBuilder.buildAShr(NVT, InH,
4140                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4141     } else if (Amt == NVTBits) {
4142       Lo = InH;
4143       Hi = MIRBuilder.buildAShr(NVT, InH,
4144                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4145     } else {
4146       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4147 
4148       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4149       auto OrRHS = MIRBuilder.buildShl(
4150           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4151 
4152       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4153       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4154     }
4155   }
4156 
4157   MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4158   MI.eraseFromParent();
4159 
4160   return Legalized;
4161 }
4162 
4163 // TODO: Optimize if constant shift amount.
4164 LegalizerHelper::LegalizeResult
4165 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
4166                                    LLT RequestedTy) {
4167   if (TypeIdx == 1) {
4168     Observer.changingInstr(MI);
4169     narrowScalarSrc(MI, RequestedTy, 2);
4170     Observer.changedInstr(MI);
4171     return Legalized;
4172   }
4173 
4174   Register DstReg = MI.getOperand(0).getReg();
4175   LLT DstTy = MRI.getType(DstReg);
4176   if (DstTy.isVector())
4177     return UnableToLegalize;
4178 
4179   Register Amt = MI.getOperand(2).getReg();
4180   LLT ShiftAmtTy = MRI.getType(Amt);
4181   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4182   if (DstEltSize % 2 != 0)
4183     return UnableToLegalize;
4184 
4185   // Ignore the input type. We can only go to exactly half the size of the
4186   // input. If that isn't small enough, the resulting pieces will be further
4187   // legalized.
4188   const unsigned NewBitSize = DstEltSize / 2;
4189   const LLT HalfTy = LLT::scalar(NewBitSize);
4190   const LLT CondTy = LLT::scalar(1);
4191 
4192   if (const MachineInstr *KShiftAmt =
4193           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4194     return narrowScalarShiftByConstant(
4195         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4196   }
4197 
4198   // TODO: Expand with known bits.
4199 
4200   // Handle the fully general expansion by an unknown amount.
4201   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4202 
4203   Register InL = MRI.createGenericVirtualRegister(HalfTy);
4204   Register InH = MRI.createGenericVirtualRegister(HalfTy);
4205   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4206 
4207   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4208   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4209 
4210   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4211   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4212   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4213 
4214   Register ResultRegs[2];
4215   switch (MI.getOpcode()) {
4216   case TargetOpcode::G_SHL: {
4217     // Short: ShAmt < NewBitSize
4218     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4219 
4220     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4221     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4222     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4223 
4224     // Long: ShAmt >= NewBitSize
4225     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
4226     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4227 
4228     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4229     auto Hi = MIRBuilder.buildSelect(
4230         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4231 
4232     ResultRegs[0] = Lo.getReg(0);
4233     ResultRegs[1] = Hi.getReg(0);
4234     break;
4235   }
4236   case TargetOpcode::G_LSHR:
4237   case TargetOpcode::G_ASHR: {
4238     // Short: ShAmt < NewBitSize
4239     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4240 
4241     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4242     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4243     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4244 
4245     // Long: ShAmt >= NewBitSize
4246     MachineInstrBuilder HiL;
4247     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4248       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
4249     } else {
4250       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4251       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
4252     }
4253     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4254                                      {InH, AmtExcess});     // Lo from Hi part.
4255 
4256     auto Lo = MIRBuilder.buildSelect(
4257         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4258 
4259     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4260 
4261     ResultRegs[0] = Lo.getReg(0);
4262     ResultRegs[1] = Hi.getReg(0);
4263     break;
4264   }
4265   default:
4266     llvm_unreachable("not a shift");
4267   }
4268 
4269   MIRBuilder.buildMerge(DstReg, ResultRegs);
4270   MI.eraseFromParent();
4271   return Legalized;
4272 }
4273 
4274 LegalizerHelper::LegalizeResult
4275 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
4276                                        LLT MoreTy) {
4277   assert(TypeIdx == 0 && "Expecting only Idx 0");
4278 
4279   Observer.changingInstr(MI);
4280   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4281     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4282     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4283     moreElementsVectorSrc(MI, MoreTy, I);
4284   }
4285 
4286   MachineBasicBlock &MBB = *MI.getParent();
4287   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
4288   moreElementsVectorDst(MI, MoreTy, 0);
4289   Observer.changedInstr(MI);
4290   return Legalized;
4291 }
4292 
4293 LegalizerHelper::LegalizeResult
4294 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
4295                                     LLT MoreTy) {
4296   unsigned Opc = MI.getOpcode();
4297   switch (Opc) {
4298   case TargetOpcode::G_IMPLICIT_DEF:
4299   case TargetOpcode::G_LOAD: {
4300     if (TypeIdx != 0)
4301       return UnableToLegalize;
4302     Observer.changingInstr(MI);
4303     moreElementsVectorDst(MI, MoreTy, 0);
4304     Observer.changedInstr(MI);
4305     return Legalized;
4306   }
4307   case TargetOpcode::G_STORE:
4308     if (TypeIdx != 0)
4309       return UnableToLegalize;
4310     Observer.changingInstr(MI);
4311     moreElementsVectorSrc(MI, MoreTy, 0);
4312     Observer.changedInstr(MI);
4313     return Legalized;
4314   case TargetOpcode::G_AND:
4315   case TargetOpcode::G_OR:
4316   case TargetOpcode::G_XOR:
4317   case TargetOpcode::G_SMIN:
4318   case TargetOpcode::G_SMAX:
4319   case TargetOpcode::G_UMIN:
4320   case TargetOpcode::G_UMAX:
4321   case TargetOpcode::G_FMINNUM:
4322   case TargetOpcode::G_FMAXNUM:
4323   case TargetOpcode::G_FMINNUM_IEEE:
4324   case TargetOpcode::G_FMAXNUM_IEEE:
4325   case TargetOpcode::G_FMINIMUM:
4326   case TargetOpcode::G_FMAXIMUM: {
4327     Observer.changingInstr(MI);
4328     moreElementsVectorSrc(MI, MoreTy, 1);
4329     moreElementsVectorSrc(MI, MoreTy, 2);
4330     moreElementsVectorDst(MI, MoreTy, 0);
4331     Observer.changedInstr(MI);
4332     return Legalized;
4333   }
4334   case TargetOpcode::G_EXTRACT:
4335     if (TypeIdx != 1)
4336       return UnableToLegalize;
4337     Observer.changingInstr(MI);
4338     moreElementsVectorSrc(MI, MoreTy, 1);
4339     Observer.changedInstr(MI);
4340     return Legalized;
4341   case TargetOpcode::G_INSERT:
4342   case TargetOpcode::G_FREEZE:
4343     if (TypeIdx != 0)
4344       return UnableToLegalize;
4345     Observer.changingInstr(MI);
4346     moreElementsVectorSrc(MI, MoreTy, 1);
4347     moreElementsVectorDst(MI, MoreTy, 0);
4348     Observer.changedInstr(MI);
4349     return Legalized;
4350   case TargetOpcode::G_SELECT:
4351     if (TypeIdx != 0)
4352       return UnableToLegalize;
4353     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4354       return UnableToLegalize;
4355 
4356     Observer.changingInstr(MI);
4357     moreElementsVectorSrc(MI, MoreTy, 2);
4358     moreElementsVectorSrc(MI, MoreTy, 3);
4359     moreElementsVectorDst(MI, MoreTy, 0);
4360     Observer.changedInstr(MI);
4361     return Legalized;
4362   case TargetOpcode::G_UNMERGE_VALUES: {
4363     if (TypeIdx != 1)
4364       return UnableToLegalize;
4365 
4366     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4367     int NumDst = MI.getNumOperands() - 1;
4368     moreElementsVectorSrc(MI, MoreTy, NumDst);
4369 
4370     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4371     for (int I = 0; I != NumDst; ++I)
4372       MIB.addDef(MI.getOperand(I).getReg());
4373 
4374     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
4375     for (int I = NumDst; I != NewNumDst; ++I)
4376       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
4377 
4378     MIB.addUse(MI.getOperand(NumDst).getReg());
4379     MI.eraseFromParent();
4380     return Legalized;
4381   }
4382   case TargetOpcode::G_PHI:
4383     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
4384   default:
4385     return UnableToLegalize;
4386   }
4387 }
4388 
4389 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
4390                                         ArrayRef<Register> Src1Regs,
4391                                         ArrayRef<Register> Src2Regs,
4392                                         LLT NarrowTy) {
4393   MachineIRBuilder &B = MIRBuilder;
4394   unsigned SrcParts = Src1Regs.size();
4395   unsigned DstParts = DstRegs.size();
4396 
4397   unsigned DstIdx = 0; // Low bits of the result.
4398   Register FactorSum =
4399       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4400   DstRegs[DstIdx] = FactorSum;
4401 
4402   unsigned CarrySumPrevDstIdx;
4403   SmallVector<Register, 4> Factors;
4404 
4405   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4406     // Collect low parts of muls for DstIdx.
4407     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4408          i <= std::min(DstIdx, SrcParts - 1); ++i) {
4409       MachineInstrBuilder Mul =
4410           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4411       Factors.push_back(Mul.getReg(0));
4412     }
4413     // Collect high parts of muls from previous DstIdx.
4414     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4415          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
4416       MachineInstrBuilder Umulh =
4417           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4418       Factors.push_back(Umulh.getReg(0));
4419     }
4420     // Add CarrySum from additions calculated for previous DstIdx.
4421     if (DstIdx != 1) {
4422       Factors.push_back(CarrySumPrevDstIdx);
4423     }
4424 
4425     Register CarrySum;
4426     // Add all factors and accumulate all carries into CarrySum.
4427     if (DstIdx != DstParts - 1) {
4428       MachineInstrBuilder Uaddo =
4429           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
4430       FactorSum = Uaddo.getReg(0);
4431       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
4432       for (unsigned i = 2; i < Factors.size(); ++i) {
4433         MachineInstrBuilder Uaddo =
4434             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
4435         FactorSum = Uaddo.getReg(0);
4436         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
4437         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4438       }
4439     } else {
4440       // Since value for the next index is not calculated, neither is CarrySum.
4441       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4442       for (unsigned i = 2; i < Factors.size(); ++i)
4443         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4444     }
4445 
4446     CarrySumPrevDstIdx = CarrySum;
4447     DstRegs[DstIdx] = FactorSum;
4448     Factors.clear();
4449   }
4450 }
4451 
4452 LegalizerHelper::LegalizeResult
4453 LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
4454                                     LLT NarrowTy) {
4455   if (TypeIdx != 0)
4456     return UnableToLegalize;
4457 
4458   Register DstReg = MI.getOperand(0).getReg();
4459   LLT DstType = MRI.getType(DstReg);
4460   // FIXME: add support for vector types
4461   if (DstType.isVector())
4462     return UnableToLegalize;
4463 
4464   uint64_t SizeOp0 = DstType.getSizeInBits();
4465   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4466 
4467   // FIXME: add support for when SizeOp0 isn't an exact multiple of
4468   // NarrowSize.
4469   if (SizeOp0 % NarrowSize != 0)
4470     return UnableToLegalize;
4471 
4472   // Expand in terms of carry-setting/consuming G_<Op>E instructions.
4473   int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
4474 
4475   unsigned Opcode = MI.getOpcode();
4476   unsigned OpO, OpE, OpF;
4477   switch (Opcode) {
4478   case TargetOpcode::G_SADDO:
4479   case TargetOpcode::G_SADDE:
4480   case TargetOpcode::G_UADDO:
4481   case TargetOpcode::G_UADDE:
4482   case TargetOpcode::G_ADD:
4483     OpO = TargetOpcode::G_UADDO;
4484     OpE = TargetOpcode::G_UADDE;
4485     OpF = TargetOpcode::G_UADDE;
4486     if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
4487       OpF = TargetOpcode::G_SADDE;
4488     break;
4489   case TargetOpcode::G_SSUBO:
4490   case TargetOpcode::G_SSUBE:
4491   case TargetOpcode::G_USUBO:
4492   case TargetOpcode::G_USUBE:
4493   case TargetOpcode::G_SUB:
4494     OpO = TargetOpcode::G_USUBO;
4495     OpE = TargetOpcode::G_USUBE;
4496     OpF = TargetOpcode::G_USUBE;
4497     if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
4498       OpF = TargetOpcode::G_SSUBE;
4499     break;
4500   default:
4501     llvm_unreachable("Unexpected add/sub opcode!");
4502   }
4503 
4504   // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
4505   unsigned NumDefs = MI.getNumExplicitDefs();
4506   Register Src1 = MI.getOperand(NumDefs).getReg();
4507   Register Src2 = MI.getOperand(NumDefs + 1).getReg();
4508   Register CarryDst;
4509   if (NumDefs == 2)
4510     CarryDst = MI.getOperand(1).getReg();
4511   Register CarryIn;
4512   if (MI.getNumOperands() == NumDefs + 3)
4513     CarryIn = MI.getOperand(NumDefs + 2).getReg();
4514 
4515   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
4516   extractParts(Src1, NarrowTy, NumParts, Src1Regs);
4517   extractParts(Src2, NarrowTy, NumParts, Src2Regs);
4518 
4519   for (int i = 0; i < NumParts; ++i) {
4520     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4521     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
4522     // Forward the final carry-out to the destination register
4523     if (i == NumParts - 1 && CarryDst)
4524       CarryOut = CarryDst;
4525 
4526     if (!CarryIn) {
4527       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
4528                             {Src1Regs[i], Src2Regs[i]});
4529     } else if (i == NumParts - 1) {
4530       MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
4531                             {Src1Regs[i], Src2Regs[i], CarryIn});
4532     } else {
4533       MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
4534                             {Src1Regs[i], Src2Regs[i], CarryIn});
4535     }
4536 
4537     DstRegs.push_back(DstReg);
4538     CarryIn = CarryOut;
4539   }
4540   MIRBuilder.buildMerge(DstReg, DstRegs);
4541   MI.eraseFromParent();
4542   return Legalized;
4543 }
4544 
4545 LegalizerHelper::LegalizeResult
4546 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
4547   Register DstReg = MI.getOperand(0).getReg();
4548   Register Src1 = MI.getOperand(1).getReg();
4549   Register Src2 = MI.getOperand(2).getReg();
4550 
4551   LLT Ty = MRI.getType(DstReg);
4552   if (Ty.isVector())
4553     return UnableToLegalize;
4554 
4555   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
4556   unsigned DstSize = Ty.getSizeInBits();
4557   unsigned NarrowSize = NarrowTy.getSizeInBits();
4558   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
4559     return UnableToLegalize;
4560 
4561   unsigned NumDstParts = DstSize / NarrowSize;
4562   unsigned NumSrcParts = SrcSize / NarrowSize;
4563   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
4564   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
4565 
4566   SmallVector<Register, 2> Src1Parts, Src2Parts;
4567   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
4568   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
4569   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
4570   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
4571 
4572   // Take only high half of registers if this is high mul.
4573   ArrayRef<Register> DstRegs(
4574       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
4575   MIRBuilder.buildMerge(DstReg, DstRegs);
4576   MI.eraseFromParent();
4577   return Legalized;
4578 }
4579 
4580 LegalizerHelper::LegalizeResult
4581 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
4582                                      LLT NarrowTy) {
4583   if (TypeIdx != 1)
4584     return UnableToLegalize;
4585 
4586   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4587 
4588   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4589   // FIXME: add support for when SizeOp1 isn't an exact multiple of
4590   // NarrowSize.
4591   if (SizeOp1 % NarrowSize != 0)
4592     return UnableToLegalize;
4593   int NumParts = SizeOp1 / NarrowSize;
4594 
4595   SmallVector<Register, 2> SrcRegs, DstRegs;
4596   SmallVector<uint64_t, 2> Indexes;
4597   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4598 
4599   Register OpReg = MI.getOperand(0).getReg();
4600   uint64_t OpStart = MI.getOperand(2).getImm();
4601   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4602   for (int i = 0; i < NumParts; ++i) {
4603     unsigned SrcStart = i * NarrowSize;
4604 
4605     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
4606       // No part of the extract uses this subregister, ignore it.
4607       continue;
4608     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4609       // The entire subregister is extracted, forward the value.
4610       DstRegs.push_back(SrcRegs[i]);
4611       continue;
4612     }
4613 
4614     // OpSegStart is where this destination segment would start in OpReg if it
4615     // extended infinitely in both directions.
4616     int64_t ExtractOffset;
4617     uint64_t SegSize;
4618     if (OpStart < SrcStart) {
4619       ExtractOffset = 0;
4620       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
4621     } else {
4622       ExtractOffset = OpStart - SrcStart;
4623       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
4624     }
4625 
4626     Register SegReg = SrcRegs[i];
4627     if (ExtractOffset != 0 || SegSize != NarrowSize) {
4628       // A genuine extract is needed.
4629       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4630       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
4631     }
4632 
4633     DstRegs.push_back(SegReg);
4634   }
4635 
4636   Register DstReg = MI.getOperand(0).getReg();
4637   if (MRI.getType(DstReg).isVector())
4638     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4639   else if (DstRegs.size() > 1)
4640     MIRBuilder.buildMerge(DstReg, DstRegs);
4641   else
4642     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
4643   MI.eraseFromParent();
4644   return Legalized;
4645 }
4646 
4647 LegalizerHelper::LegalizeResult
4648 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
4649                                     LLT NarrowTy) {
4650   // FIXME: Don't know how to handle secondary types yet.
4651   if (TypeIdx != 0)
4652     return UnableToLegalize;
4653 
4654   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4655   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4656 
4657   // FIXME: add support for when SizeOp0 isn't an exact multiple of
4658   // NarrowSize.
4659   if (SizeOp0 % NarrowSize != 0)
4660     return UnableToLegalize;
4661 
4662   int NumParts = SizeOp0 / NarrowSize;
4663 
4664   SmallVector<Register, 2> SrcRegs, DstRegs;
4665   SmallVector<uint64_t, 2> Indexes;
4666   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4667 
4668   Register OpReg = MI.getOperand(2).getReg();
4669   uint64_t OpStart = MI.getOperand(3).getImm();
4670   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4671   for (int i = 0; i < NumParts; ++i) {
4672     unsigned DstStart = i * NarrowSize;
4673 
4674     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
4675       // No part of the insert affects this subregister, forward the original.
4676       DstRegs.push_back(SrcRegs[i]);
4677       continue;
4678     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4679       // The entire subregister is defined by this insert, forward the new
4680       // value.
4681       DstRegs.push_back(OpReg);
4682       continue;
4683     }
4684 
4685     // OpSegStart is where this destination segment would start in OpReg if it
4686     // extended infinitely in both directions.
4687     int64_t ExtractOffset, InsertOffset;
4688     uint64_t SegSize;
4689     if (OpStart < DstStart) {
4690       InsertOffset = 0;
4691       ExtractOffset = DstStart - OpStart;
4692       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
4693     } else {
4694       InsertOffset = OpStart - DstStart;
4695       ExtractOffset = 0;
4696       SegSize =
4697         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
4698     }
4699 
4700     Register SegReg = OpReg;
4701     if (ExtractOffset != 0 || SegSize != OpSize) {
4702       // A genuine extract is needed.
4703       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4704       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
4705     }
4706 
4707     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4708     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
4709     DstRegs.push_back(DstReg);
4710   }
4711 
4712   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
4713   Register DstReg = MI.getOperand(0).getReg();
4714   if(MRI.getType(DstReg).isVector())
4715     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4716   else
4717     MIRBuilder.buildMerge(DstReg, DstRegs);
4718   MI.eraseFromParent();
4719   return Legalized;
4720 }
4721 
4722 LegalizerHelper::LegalizeResult
4723 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
4724                                    LLT NarrowTy) {
4725   Register DstReg = MI.getOperand(0).getReg();
4726   LLT DstTy = MRI.getType(DstReg);
4727 
4728   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
4729 
4730   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4731   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
4732   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4733   LLT LeftoverTy;
4734   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
4735                     Src0Regs, Src0LeftoverRegs))
4736     return UnableToLegalize;
4737 
4738   LLT Unused;
4739   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
4740                     Src1Regs, Src1LeftoverRegs))
4741     llvm_unreachable("inconsistent extractParts result");
4742 
4743   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4744     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
4745                                         {Src0Regs[I], Src1Regs[I]});
4746     DstRegs.push_back(Inst.getReg(0));
4747   }
4748 
4749   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4750     auto Inst = MIRBuilder.buildInstr(
4751       MI.getOpcode(),
4752       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
4753     DstLeftoverRegs.push_back(Inst.getReg(0));
4754   }
4755 
4756   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4757               LeftoverTy, DstLeftoverRegs);
4758 
4759   MI.eraseFromParent();
4760   return Legalized;
4761 }
4762 
4763 LegalizerHelper::LegalizeResult
4764 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
4765                                  LLT NarrowTy) {
4766   if (TypeIdx != 0)
4767     return UnableToLegalize;
4768 
4769   Register DstReg = MI.getOperand(0).getReg();
4770   Register SrcReg = MI.getOperand(1).getReg();
4771 
4772   LLT DstTy = MRI.getType(DstReg);
4773   if (DstTy.isVector())
4774     return UnableToLegalize;
4775 
4776   SmallVector<Register, 8> Parts;
4777   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4778   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
4779   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4780 
4781   MI.eraseFromParent();
4782   return Legalized;
4783 }
4784 
4785 LegalizerHelper::LegalizeResult
4786 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
4787                                     LLT NarrowTy) {
4788   if (TypeIdx != 0)
4789     return UnableToLegalize;
4790 
4791   Register CondReg = MI.getOperand(1).getReg();
4792   LLT CondTy = MRI.getType(CondReg);
4793   if (CondTy.isVector()) // TODO: Handle vselect
4794     return UnableToLegalize;
4795 
4796   Register DstReg = MI.getOperand(0).getReg();
4797   LLT DstTy = MRI.getType(DstReg);
4798 
4799   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4800   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4801   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
4802   LLT LeftoverTy;
4803   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
4804                     Src1Regs, Src1LeftoverRegs))
4805     return UnableToLegalize;
4806 
4807   LLT Unused;
4808   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
4809                     Src2Regs, Src2LeftoverRegs))
4810     llvm_unreachable("inconsistent extractParts result");
4811 
4812   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4813     auto Select = MIRBuilder.buildSelect(NarrowTy,
4814                                          CondReg, Src1Regs[I], Src2Regs[I]);
4815     DstRegs.push_back(Select.getReg(0));
4816   }
4817 
4818   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4819     auto Select = MIRBuilder.buildSelect(
4820       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
4821     DstLeftoverRegs.push_back(Select.getReg(0));
4822   }
4823 
4824   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4825               LeftoverTy, DstLeftoverRegs);
4826 
4827   MI.eraseFromParent();
4828   return Legalized;
4829 }
4830 
4831 LegalizerHelper::LegalizeResult
4832 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
4833                                   LLT NarrowTy) {
4834   if (TypeIdx != 1)
4835     return UnableToLegalize;
4836 
4837   Register DstReg = MI.getOperand(0).getReg();
4838   Register SrcReg = MI.getOperand(1).getReg();
4839   LLT DstTy = MRI.getType(DstReg);
4840   LLT SrcTy = MRI.getType(SrcReg);
4841   unsigned NarrowSize = NarrowTy.getSizeInBits();
4842 
4843   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4844     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
4845 
4846     MachineIRBuilder &B = MIRBuilder;
4847     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4848     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
4849     auto C_0 = B.buildConstant(NarrowTy, 0);
4850     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4851                                 UnmergeSrc.getReg(1), C_0);
4852     auto LoCTLZ = IsUndef ?
4853       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4854       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
4855     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4856     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
4857     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
4858     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
4859 
4860     MI.eraseFromParent();
4861     return Legalized;
4862   }
4863 
4864   return UnableToLegalize;
4865 }
4866 
4867 LegalizerHelper::LegalizeResult
4868 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
4869                                   LLT NarrowTy) {
4870   if (TypeIdx != 1)
4871     return UnableToLegalize;
4872 
4873   Register DstReg = MI.getOperand(0).getReg();
4874   Register SrcReg = MI.getOperand(1).getReg();
4875   LLT DstTy = MRI.getType(DstReg);
4876   LLT SrcTy = MRI.getType(SrcReg);
4877   unsigned NarrowSize = NarrowTy.getSizeInBits();
4878 
4879   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4880     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
4881 
4882     MachineIRBuilder &B = MIRBuilder;
4883     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4884     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
4885     auto C_0 = B.buildConstant(NarrowTy, 0);
4886     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4887                                 UnmergeSrc.getReg(0), C_0);
4888     auto HiCTTZ = IsUndef ?
4889       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
4890       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
4891     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4892     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
4893     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
4894     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
4895 
4896     MI.eraseFromParent();
4897     return Legalized;
4898   }
4899 
4900   return UnableToLegalize;
4901 }
4902 
4903 LegalizerHelper::LegalizeResult
4904 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
4905                                    LLT NarrowTy) {
4906   if (TypeIdx != 1)
4907     return UnableToLegalize;
4908 
4909   Register DstReg = MI.getOperand(0).getReg();
4910   LLT DstTy = MRI.getType(DstReg);
4911   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
4912   unsigned NarrowSize = NarrowTy.getSizeInBits();
4913 
4914   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4915     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
4916 
4917     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
4918     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
4919     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
4920 
4921     MI.eraseFromParent();
4922     return Legalized;
4923   }
4924 
4925   return UnableToLegalize;
4926 }
4927 
4928 LegalizerHelper::LegalizeResult
4929 LegalizerHelper::lowerBitCount(MachineInstr &MI) {
4930   unsigned Opc = MI.getOpcode();
4931   const auto &TII = MIRBuilder.getTII();
4932   auto isSupported = [this](const LegalityQuery &Q) {
4933     auto QAction = LI.getAction(Q).Action;
4934     return QAction == Legal || QAction == Libcall || QAction == Custom;
4935   };
4936   switch (Opc) {
4937   default:
4938     return UnableToLegalize;
4939   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
4940     // This trivially expands to CTLZ.
4941     Observer.changingInstr(MI);
4942     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
4943     Observer.changedInstr(MI);
4944     return Legalized;
4945   }
4946   case TargetOpcode::G_CTLZ: {
4947     Register DstReg = MI.getOperand(0).getReg();
4948     Register SrcReg = MI.getOperand(1).getReg();
4949     LLT DstTy = MRI.getType(DstReg);
4950     LLT SrcTy = MRI.getType(SrcReg);
4951     unsigned Len = SrcTy.getSizeInBits();
4952 
4953     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4954       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
4955       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
4956       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
4957       auto ICmp = MIRBuilder.buildICmp(
4958           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
4959       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4960       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
4961       MI.eraseFromParent();
4962       return Legalized;
4963     }
4964     // for now, we do this:
4965     // NewLen = NextPowerOf2(Len);
4966     // x = x | (x >> 1);
4967     // x = x | (x >> 2);
4968     // ...
4969     // x = x | (x >>16);
4970     // x = x | (x >>32); // for 64-bit input
4971     // Upto NewLen/2
4972     // return Len - popcount(x);
4973     //
4974     // Ref: "Hacker's Delight" by Henry Warren
4975     Register Op = SrcReg;
4976     unsigned NewLen = PowerOf2Ceil(Len);
4977     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
4978       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
4979       auto MIBOp = MIRBuilder.buildOr(
4980           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
4981       Op = MIBOp.getReg(0);
4982     }
4983     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
4984     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
4985                         MIBPop);
4986     MI.eraseFromParent();
4987     return Legalized;
4988   }
4989   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
4990     // This trivially expands to CTTZ.
4991     Observer.changingInstr(MI);
4992     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
4993     Observer.changedInstr(MI);
4994     return Legalized;
4995   }
4996   case TargetOpcode::G_CTTZ: {
4997     Register DstReg = MI.getOperand(0).getReg();
4998     Register SrcReg = MI.getOperand(1).getReg();
4999     LLT DstTy = MRI.getType(DstReg);
5000     LLT SrcTy = MRI.getType(SrcReg);
5001 
5002     unsigned Len = SrcTy.getSizeInBits();
5003     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
5004       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
5005       // zero.
5006       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
5007       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
5008       auto ICmp = MIRBuilder.buildICmp(
5009           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
5010       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
5011       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
5012       MI.eraseFromParent();
5013       return Legalized;
5014     }
5015     // for now, we use: { return popcount(~x & (x - 1)); }
5016     // unless the target has ctlz but not ctpop, in which case we use:
5017     // { return 32 - nlz(~x & (x-1)); }
5018     // Ref: "Hacker's Delight" by Henry Warren
5019     auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
5020     auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
5021     auto MIBTmp = MIRBuilder.buildAnd(
5022         SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
5023     if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
5024         isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
5025       auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
5026       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
5027                           MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
5028       MI.eraseFromParent();
5029       return Legalized;
5030     }
5031     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
5032     MI.getOperand(1).setReg(MIBTmp.getReg(0));
5033     return Legalized;
5034   }
5035   case TargetOpcode::G_CTPOP: {
5036     Register SrcReg = MI.getOperand(1).getReg();
5037     LLT Ty = MRI.getType(SrcReg);
5038     unsigned Size = Ty.getSizeInBits();
5039     MachineIRBuilder &B = MIRBuilder;
5040 
5041     // Count set bits in blocks of 2 bits. Default approach would be
5042     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
5043     // We use following formula instead:
5044     // B2Count = val - { (val >> 1) & 0x55555555 }
5045     // since it gives same result in blocks of 2 with one instruction less.
5046     auto C_1 = B.buildConstant(Ty, 1);
5047     auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
5048     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
5049     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
5050     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
5051     auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
5052 
5053     // In order to get count in blocks of 4 add values from adjacent block of 2.
5054     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
5055     auto C_2 = B.buildConstant(Ty, 2);
5056     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
5057     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
5058     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
5059     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
5060     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
5061     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
5062 
5063     // For count in blocks of 8 bits we don't have to mask high 4 bits before
5064     // addition since count value sits in range {0,...,8} and 4 bits are enough
5065     // to hold such binary values. After addition high 4 bits still hold count
5066     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
5067     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
5068     auto C_4 = B.buildConstant(Ty, 4);
5069     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
5070     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
5071     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
5072     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
5073     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
5074 
5075     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
5076     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
5077     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
5078     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
5079     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
5080 
5081     // Shift count result from 8 high bits to low bits.
5082     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
5083     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
5084 
5085     MI.eraseFromParent();
5086     return Legalized;
5087   }
5088   }
5089 }
5090 
5091 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
5092 // representation.
5093 LegalizerHelper::LegalizeResult
5094 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
5095   Register Dst = MI.getOperand(0).getReg();
5096   Register Src = MI.getOperand(1).getReg();
5097   const LLT S64 = LLT::scalar(64);
5098   const LLT S32 = LLT::scalar(32);
5099   const LLT S1 = LLT::scalar(1);
5100 
5101   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
5102 
5103   // unsigned cul2f(ulong u) {
5104   //   uint lz = clz(u);
5105   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
5106   //   u = (u << lz) & 0x7fffffffffffffffUL;
5107   //   ulong t = u & 0xffffffffffUL;
5108   //   uint v = (e << 23) | (uint)(u >> 40);
5109   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
5110   //   return as_float(v + r);
5111   // }
5112 
5113   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
5114   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
5115 
5116   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
5117 
5118   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
5119   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
5120 
5121   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
5122   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
5123 
5124   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
5125   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
5126 
5127   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
5128 
5129   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
5130   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
5131 
5132   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
5133   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
5134   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
5135 
5136   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
5137   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
5138   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
5139   auto One = MIRBuilder.buildConstant(S32, 1);
5140 
5141   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
5142   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
5143   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
5144   MIRBuilder.buildAdd(Dst, V, R);
5145 
5146   MI.eraseFromParent();
5147   return Legalized;
5148 }
5149 
5150 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
5151   Register Dst = MI.getOperand(0).getReg();
5152   Register Src = MI.getOperand(1).getReg();
5153   LLT DstTy = MRI.getType(Dst);
5154   LLT SrcTy = MRI.getType(Src);
5155 
5156   if (SrcTy == LLT::scalar(1)) {
5157     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
5158     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5159     MIRBuilder.buildSelect(Dst, Src, True, False);
5160     MI.eraseFromParent();
5161     return Legalized;
5162   }
5163 
5164   if (SrcTy != LLT::scalar(64))
5165     return UnableToLegalize;
5166 
5167   if (DstTy == LLT::scalar(32)) {
5168     // TODO: SelectionDAG has several alternative expansions to port which may
5169     // be more reasonble depending on the available instructions. If a target
5170     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
5171     // intermediate type, this is probably worse.
5172     return lowerU64ToF32BitOps(MI);
5173   }
5174 
5175   return UnableToLegalize;
5176 }
5177 
5178 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
5179   Register Dst = MI.getOperand(0).getReg();
5180   Register Src = MI.getOperand(1).getReg();
5181   LLT DstTy = MRI.getType(Dst);
5182   LLT SrcTy = MRI.getType(Src);
5183 
5184   const LLT S64 = LLT::scalar(64);
5185   const LLT S32 = LLT::scalar(32);
5186   const LLT S1 = LLT::scalar(1);
5187 
5188   if (SrcTy == S1) {
5189     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
5190     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5191     MIRBuilder.buildSelect(Dst, Src, True, False);
5192     MI.eraseFromParent();
5193     return Legalized;
5194   }
5195 
5196   if (SrcTy != S64)
5197     return UnableToLegalize;
5198 
5199   if (DstTy == S32) {
5200     // signed cl2f(long l) {
5201     //   long s = l >> 63;
5202     //   float r = cul2f((l + s) ^ s);
5203     //   return s ? -r : r;
5204     // }
5205     Register L = Src;
5206     auto SignBit = MIRBuilder.buildConstant(S64, 63);
5207     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
5208 
5209     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
5210     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
5211     auto R = MIRBuilder.buildUITOFP(S32, Xor);
5212 
5213     auto RNeg = MIRBuilder.buildFNeg(S32, R);
5214     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
5215                                             MIRBuilder.buildConstant(S64, 0));
5216     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
5217     MI.eraseFromParent();
5218     return Legalized;
5219   }
5220 
5221   return UnableToLegalize;
5222 }
5223 
5224 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
5225   Register Dst = MI.getOperand(0).getReg();
5226   Register Src = MI.getOperand(1).getReg();
5227   LLT DstTy = MRI.getType(Dst);
5228   LLT SrcTy = MRI.getType(Src);
5229   const LLT S64 = LLT::scalar(64);
5230   const LLT S32 = LLT::scalar(32);
5231 
5232   if (SrcTy != S64 && SrcTy != S32)
5233     return UnableToLegalize;
5234   if (DstTy != S32 && DstTy != S64)
5235     return UnableToLegalize;
5236 
5237   // FPTOSI gives same result as FPTOUI for positive signed integers.
5238   // FPTOUI needs to deal with fp values that convert to unsigned integers
5239   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
5240 
5241   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
5242   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
5243                                                 : APFloat::IEEEdouble(),
5244                     APInt::getNullValue(SrcTy.getSizeInBits()));
5245   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
5246 
5247   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
5248 
5249   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
5250   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
5251   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
5252   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
5253   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
5254   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
5255   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
5256 
5257   const LLT S1 = LLT::scalar(1);
5258 
5259   MachineInstrBuilder FCMP =
5260       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
5261   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
5262 
5263   MI.eraseFromParent();
5264   return Legalized;
5265 }
5266 
5267 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
5268   Register Dst = MI.getOperand(0).getReg();
5269   Register Src = MI.getOperand(1).getReg();
5270   LLT DstTy = MRI.getType(Dst);
5271   LLT SrcTy = MRI.getType(Src);
5272   const LLT S64 = LLT::scalar(64);
5273   const LLT S32 = LLT::scalar(32);
5274 
5275   // FIXME: Only f32 to i64 conversions are supported.
5276   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
5277     return UnableToLegalize;
5278 
5279   // Expand f32 -> i64 conversion
5280   // This algorithm comes from compiler-rt's implementation of fixsfdi:
5281   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
5282 
5283   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
5284 
5285   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
5286   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
5287 
5288   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
5289   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
5290 
5291   auto SignMask = MIRBuilder.buildConstant(SrcTy,
5292                                            APInt::getSignMask(SrcEltBits));
5293   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
5294   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
5295   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
5296   Sign = MIRBuilder.buildSExt(DstTy, Sign);
5297 
5298   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
5299   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
5300   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
5301 
5302   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
5303   R = MIRBuilder.buildZExt(DstTy, R);
5304 
5305   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
5306   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
5307   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
5308   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
5309 
5310   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
5311   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
5312 
5313   const LLT S1 = LLT::scalar(1);
5314   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
5315                                     S1, Exponent, ExponentLoBit);
5316 
5317   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
5318 
5319   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
5320   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
5321 
5322   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
5323 
5324   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
5325                                           S1, Exponent, ZeroSrcTy);
5326 
5327   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
5328   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
5329 
5330   MI.eraseFromParent();
5331   return Legalized;
5332 }
5333 
5334 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
5335 LegalizerHelper::LegalizeResult
5336 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
5337   Register Dst = MI.getOperand(0).getReg();
5338   Register Src = MI.getOperand(1).getReg();
5339 
5340   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
5341     return UnableToLegalize;
5342 
5343   const unsigned ExpMask = 0x7ff;
5344   const unsigned ExpBiasf64 = 1023;
5345   const unsigned ExpBiasf16 = 15;
5346   const LLT S32 = LLT::scalar(32);
5347   const LLT S1 = LLT::scalar(1);
5348 
5349   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
5350   Register U = Unmerge.getReg(0);
5351   Register UH = Unmerge.getReg(1);
5352 
5353   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
5354   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
5355 
5356   // Subtract the fp64 exponent bias (1023) to get the real exponent and
5357   // add the f16 bias (15) to get the biased exponent for the f16 format.
5358   E = MIRBuilder.buildAdd(
5359     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
5360 
5361   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
5362   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
5363 
5364   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
5365                                        MIRBuilder.buildConstant(S32, 0x1ff));
5366   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
5367 
5368   auto Zero = MIRBuilder.buildConstant(S32, 0);
5369   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
5370   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
5371   M = MIRBuilder.buildOr(S32, M, Lo40Set);
5372 
5373   // (M != 0 ? 0x0200 : 0) | 0x7c00;
5374   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
5375   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
5376   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
5377 
5378   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
5379   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
5380 
5381   // N = M | (E << 12);
5382   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
5383   auto N = MIRBuilder.buildOr(S32, M, EShl12);
5384 
5385   // B = clamp(1-E, 0, 13);
5386   auto One = MIRBuilder.buildConstant(S32, 1);
5387   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
5388   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
5389   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
5390 
5391   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
5392                                        MIRBuilder.buildConstant(S32, 0x1000));
5393 
5394   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
5395   auto D0 = MIRBuilder.buildShl(S32, D, B);
5396 
5397   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
5398                                              D0, SigSetHigh);
5399   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
5400   D = MIRBuilder.buildOr(S32, D, D1);
5401 
5402   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
5403   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
5404 
5405   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
5406   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
5407 
5408   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
5409                                        MIRBuilder.buildConstant(S32, 3));
5410   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
5411 
5412   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
5413                                        MIRBuilder.buildConstant(S32, 5));
5414   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
5415 
5416   V1 = MIRBuilder.buildOr(S32, V0, V1);
5417   V = MIRBuilder.buildAdd(S32, V, V1);
5418 
5419   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
5420                                        E, MIRBuilder.buildConstant(S32, 30));
5421   V = MIRBuilder.buildSelect(S32, CmpEGt30,
5422                              MIRBuilder.buildConstant(S32, 0x7c00), V);
5423 
5424   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
5425                                          E, MIRBuilder.buildConstant(S32, 1039));
5426   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
5427 
5428   // Extract the sign bit.
5429   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
5430   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
5431 
5432   // Insert the sign bit
5433   V = MIRBuilder.buildOr(S32, Sign, V);
5434 
5435   MIRBuilder.buildTrunc(Dst, V);
5436   MI.eraseFromParent();
5437   return Legalized;
5438 }
5439 
5440 LegalizerHelper::LegalizeResult
5441 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
5442   Register Dst = MI.getOperand(0).getReg();
5443   Register Src = MI.getOperand(1).getReg();
5444 
5445   LLT DstTy = MRI.getType(Dst);
5446   LLT SrcTy = MRI.getType(Src);
5447   const LLT S64 = LLT::scalar(64);
5448   const LLT S16 = LLT::scalar(16);
5449 
5450   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
5451     return lowerFPTRUNC_F64_TO_F16(MI);
5452 
5453   return UnableToLegalize;
5454 }
5455 
5456 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
5457 // multiplication tree.
5458 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
5459   Register Dst = MI.getOperand(0).getReg();
5460   Register Src0 = MI.getOperand(1).getReg();
5461   Register Src1 = MI.getOperand(2).getReg();
5462   LLT Ty = MRI.getType(Dst);
5463 
5464   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
5465   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
5466   MI.eraseFromParent();
5467   return Legalized;
5468 }
5469 
5470 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
5471   switch (Opc) {
5472   case TargetOpcode::G_SMIN:
5473     return CmpInst::ICMP_SLT;
5474   case TargetOpcode::G_SMAX:
5475     return CmpInst::ICMP_SGT;
5476   case TargetOpcode::G_UMIN:
5477     return CmpInst::ICMP_ULT;
5478   case TargetOpcode::G_UMAX:
5479     return CmpInst::ICMP_UGT;
5480   default:
5481     llvm_unreachable("not in integer min/max");
5482   }
5483 }
5484 
5485 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
5486   Register Dst = MI.getOperand(0).getReg();
5487   Register Src0 = MI.getOperand(1).getReg();
5488   Register Src1 = MI.getOperand(2).getReg();
5489 
5490   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
5491   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
5492 
5493   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
5494   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
5495 
5496   MI.eraseFromParent();
5497   return Legalized;
5498 }
5499 
5500 LegalizerHelper::LegalizeResult
5501 LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
5502   Register Dst = MI.getOperand(0).getReg();
5503   Register Src0 = MI.getOperand(1).getReg();
5504   Register Src1 = MI.getOperand(2).getReg();
5505 
5506   const LLT Src0Ty = MRI.getType(Src0);
5507   const LLT Src1Ty = MRI.getType(Src1);
5508 
5509   const int Src0Size = Src0Ty.getScalarSizeInBits();
5510   const int Src1Size = Src1Ty.getScalarSizeInBits();
5511 
5512   auto SignBitMask = MIRBuilder.buildConstant(
5513     Src0Ty, APInt::getSignMask(Src0Size));
5514 
5515   auto NotSignBitMask = MIRBuilder.buildConstant(
5516     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
5517 
5518   Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
5519   Register And1;
5520   if (Src0Ty == Src1Ty) {
5521     And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
5522   } else if (Src0Size > Src1Size) {
5523     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
5524     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
5525     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
5526     And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
5527   } else {
5528     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
5529     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
5530     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
5531     And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
5532   }
5533 
5534   // Be careful about setting nsz/nnan/ninf on every instruction, since the
5535   // constants are a nan and -0.0, but the final result should preserve
5536   // everything.
5537   unsigned Flags = MI.getFlags();
5538   MIRBuilder.buildOr(Dst, And0, And1, Flags);
5539 
5540   MI.eraseFromParent();
5541   return Legalized;
5542 }
5543 
5544 LegalizerHelper::LegalizeResult
5545 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
5546   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
5547     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
5548 
5549   Register Dst = MI.getOperand(0).getReg();
5550   Register Src0 = MI.getOperand(1).getReg();
5551   Register Src1 = MI.getOperand(2).getReg();
5552   LLT Ty = MRI.getType(Dst);
5553 
5554   if (!MI.getFlag(MachineInstr::FmNoNans)) {
5555     // Insert canonicalizes if it's possible we need to quiet to get correct
5556     // sNaN behavior.
5557 
5558     // Note this must be done here, and not as an optimization combine in the
5559     // absence of a dedicate quiet-snan instruction as we're using an
5560     // omni-purpose G_FCANONICALIZE.
5561     if (!isKnownNeverSNaN(Src0, MRI))
5562       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
5563 
5564     if (!isKnownNeverSNaN(Src1, MRI))
5565       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
5566   }
5567 
5568   // If there are no nans, it's safe to simply replace this with the non-IEEE
5569   // version.
5570   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
5571   MI.eraseFromParent();
5572   return Legalized;
5573 }
5574 
5575 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
5576   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
5577   Register DstReg = MI.getOperand(0).getReg();
5578   LLT Ty = MRI.getType(DstReg);
5579   unsigned Flags = MI.getFlags();
5580 
5581   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
5582                                   Flags);
5583   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
5584   MI.eraseFromParent();
5585   return Legalized;
5586 }
5587 
5588 LegalizerHelper::LegalizeResult
5589 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
5590   Register DstReg = MI.getOperand(0).getReg();
5591   Register X = MI.getOperand(1).getReg();
5592   const unsigned Flags = MI.getFlags();
5593   const LLT Ty = MRI.getType(DstReg);
5594   const LLT CondTy = Ty.changeElementSize(1);
5595 
5596   // round(x) =>
5597   //  t = trunc(x);
5598   //  d = fabs(x - t);
5599   //  o = copysign(1.0f, x);
5600   //  return t + (d >= 0.5 ? o : 0.0);
5601 
5602   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
5603 
5604   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
5605   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
5606   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5607   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
5608   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
5609   auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
5610 
5611   auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
5612                                   Flags);
5613   auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
5614 
5615   MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
5616 
5617   MI.eraseFromParent();
5618   return Legalized;
5619 }
5620 
5621 LegalizerHelper::LegalizeResult
5622 LegalizerHelper::lowerFFloor(MachineInstr &MI) {
5623   Register DstReg = MI.getOperand(0).getReg();
5624   Register SrcReg = MI.getOperand(1).getReg();
5625   unsigned Flags = MI.getFlags();
5626   LLT Ty = MRI.getType(DstReg);
5627   const LLT CondTy = Ty.changeElementSize(1);
5628 
5629   // result = trunc(src);
5630   // if (src < 0.0 && src != result)
5631   //   result += -1.0.
5632 
5633   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
5634   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5635 
5636   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
5637                                   SrcReg, Zero, Flags);
5638   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
5639                                       SrcReg, Trunc, Flags);
5640   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
5641   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
5642 
5643   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
5644   MI.eraseFromParent();
5645   return Legalized;
5646 }
5647 
5648 LegalizerHelper::LegalizeResult
5649 LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
5650   const unsigned NumOps = MI.getNumOperands();
5651   Register DstReg = MI.getOperand(0).getReg();
5652   Register Src0Reg = MI.getOperand(1).getReg();
5653   LLT DstTy = MRI.getType(DstReg);
5654   LLT SrcTy = MRI.getType(Src0Reg);
5655   unsigned PartSize = SrcTy.getSizeInBits();
5656 
5657   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
5658   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
5659 
5660   for (unsigned I = 2; I != NumOps; ++I) {
5661     const unsigned Offset = (I - 1) * PartSize;
5662 
5663     Register SrcReg = MI.getOperand(I).getReg();
5664     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
5665 
5666     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
5667       MRI.createGenericVirtualRegister(WideTy);
5668 
5669     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
5670     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
5671     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
5672     ResultReg = NextResult;
5673   }
5674 
5675   if (DstTy.isPointer()) {
5676     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
5677           DstTy.getAddressSpace())) {
5678       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
5679       return UnableToLegalize;
5680     }
5681 
5682     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
5683   }
5684 
5685   MI.eraseFromParent();
5686   return Legalized;
5687 }
5688 
5689 LegalizerHelper::LegalizeResult
5690 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
5691   const unsigned NumDst = MI.getNumOperands() - 1;
5692   Register SrcReg = MI.getOperand(NumDst).getReg();
5693   Register Dst0Reg = MI.getOperand(0).getReg();
5694   LLT DstTy = MRI.getType(Dst0Reg);
5695   if (DstTy.isPointer())
5696     return UnableToLegalize; // TODO
5697 
5698   SrcReg = coerceToScalar(SrcReg);
5699   if (!SrcReg)
5700     return UnableToLegalize;
5701 
5702   // Expand scalarizing unmerge as bitcast to integer and shift.
5703   LLT IntTy = MRI.getType(SrcReg);
5704 
5705   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
5706 
5707   const unsigned DstSize = DstTy.getSizeInBits();
5708   unsigned Offset = DstSize;
5709   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
5710     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
5711     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
5712     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
5713   }
5714 
5715   MI.eraseFromParent();
5716   return Legalized;
5717 }
5718 
5719 /// Lower a vector extract or insert by writing the vector to a stack temporary
5720 /// and reloading the element or vector.
5721 ///
5722 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
5723 ///  =>
5724 ///  %stack_temp = G_FRAME_INDEX
5725 ///  G_STORE %vec, %stack_temp
5726 ///  %idx = clamp(%idx, %vec.getNumElements())
5727 ///  %element_ptr = G_PTR_ADD %stack_temp, %idx
5728 ///  %dst = G_LOAD %element_ptr
5729 LegalizerHelper::LegalizeResult
5730 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
5731   Register DstReg = MI.getOperand(0).getReg();
5732   Register SrcVec = MI.getOperand(1).getReg();
5733   Register InsertVal;
5734   if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
5735     InsertVal = MI.getOperand(2).getReg();
5736 
5737   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5738 
5739   LLT VecTy = MRI.getType(SrcVec);
5740   LLT EltTy = VecTy.getElementType();
5741   if (!EltTy.isByteSized()) { // Not implemented.
5742     LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
5743     return UnableToLegalize;
5744   }
5745 
5746   unsigned EltBytes = EltTy.getSizeInBytes();
5747   Align VecAlign = getStackTemporaryAlignment(VecTy);
5748   Align EltAlign;
5749 
5750   MachinePointerInfo PtrInfo;
5751   auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
5752                                         VecAlign, PtrInfo);
5753   MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
5754 
5755   // Get the pointer to the element, and be sure not to hit undefined behavior
5756   // if the index is out of bounds.
5757   Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
5758 
5759   int64_t IdxVal;
5760   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
5761     int64_t Offset = IdxVal * EltBytes;
5762     PtrInfo = PtrInfo.getWithOffset(Offset);
5763     EltAlign = commonAlignment(VecAlign, Offset);
5764   } else {
5765     // We lose information with a variable offset.
5766     EltAlign = getStackTemporaryAlignment(EltTy);
5767     PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
5768   }
5769 
5770   if (InsertVal) {
5771     // Write the inserted element
5772     MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
5773 
5774     // Reload the whole vector.
5775     MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
5776   } else {
5777     MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
5778   }
5779 
5780   MI.eraseFromParent();
5781   return Legalized;
5782 }
5783 
5784 LegalizerHelper::LegalizeResult
5785 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
5786   Register DstReg = MI.getOperand(0).getReg();
5787   Register Src0Reg = MI.getOperand(1).getReg();
5788   Register Src1Reg = MI.getOperand(2).getReg();
5789   LLT Src0Ty = MRI.getType(Src0Reg);
5790   LLT DstTy = MRI.getType(DstReg);
5791   LLT IdxTy = LLT::scalar(32);
5792 
5793   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5794 
5795   if (DstTy.isScalar()) {
5796     if (Src0Ty.isVector())
5797       return UnableToLegalize;
5798 
5799     // This is just a SELECT.
5800     assert(Mask.size() == 1 && "Expected a single mask element");
5801     Register Val;
5802     if (Mask[0] < 0 || Mask[0] > 1)
5803       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
5804     else
5805       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
5806     MIRBuilder.buildCopy(DstReg, Val);
5807     MI.eraseFromParent();
5808     return Legalized;
5809   }
5810 
5811   Register Undef;
5812   SmallVector<Register, 32> BuildVec;
5813   LLT EltTy = DstTy.getElementType();
5814 
5815   for (int Idx : Mask) {
5816     if (Idx < 0) {
5817       if (!Undef.isValid())
5818         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
5819       BuildVec.push_back(Undef);
5820       continue;
5821     }
5822 
5823     if (Src0Ty.isScalar()) {
5824       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
5825     } else {
5826       int NumElts = Src0Ty.getNumElements();
5827       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
5828       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
5829       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
5830       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
5831       BuildVec.push_back(Extract.getReg(0));
5832     }
5833   }
5834 
5835   MIRBuilder.buildBuildVector(DstReg, BuildVec);
5836   MI.eraseFromParent();
5837   return Legalized;
5838 }
5839 
5840 LegalizerHelper::LegalizeResult
5841 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
5842   const auto &MF = *MI.getMF();
5843   const auto &TFI = *MF.getSubtarget().getFrameLowering();
5844   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
5845     return UnableToLegalize;
5846 
5847   Register Dst = MI.getOperand(0).getReg();
5848   Register AllocSize = MI.getOperand(1).getReg();
5849   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
5850 
5851   LLT PtrTy = MRI.getType(Dst);
5852   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
5853 
5854   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
5855   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
5856   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
5857 
5858   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
5859   // have to generate an extra instruction to negate the alloc and then use
5860   // G_PTR_ADD to add the negative offset.
5861   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
5862   if (Alignment > Align(1)) {
5863     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
5864     AlignMask.negate();
5865     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
5866     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
5867   }
5868 
5869   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
5870   MIRBuilder.buildCopy(SPReg, SPTmp);
5871   MIRBuilder.buildCopy(Dst, SPTmp);
5872 
5873   MI.eraseFromParent();
5874   return Legalized;
5875 }
5876 
5877 LegalizerHelper::LegalizeResult
5878 LegalizerHelper::lowerExtract(MachineInstr &MI) {
5879   Register Dst = MI.getOperand(0).getReg();
5880   Register Src = MI.getOperand(1).getReg();
5881   unsigned Offset = MI.getOperand(2).getImm();
5882 
5883   LLT DstTy = MRI.getType(Dst);
5884   LLT SrcTy = MRI.getType(Src);
5885 
5886   if (DstTy.isScalar() &&
5887       (SrcTy.isScalar() ||
5888        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
5889     LLT SrcIntTy = SrcTy;
5890     if (!SrcTy.isScalar()) {
5891       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
5892       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
5893     }
5894 
5895     if (Offset == 0)
5896       MIRBuilder.buildTrunc(Dst, Src);
5897     else {
5898       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
5899       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
5900       MIRBuilder.buildTrunc(Dst, Shr);
5901     }
5902 
5903     MI.eraseFromParent();
5904     return Legalized;
5905   }
5906 
5907   return UnableToLegalize;
5908 }
5909 
5910 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
5911   Register Dst = MI.getOperand(0).getReg();
5912   Register Src = MI.getOperand(1).getReg();
5913   Register InsertSrc = MI.getOperand(2).getReg();
5914   uint64_t Offset = MI.getOperand(3).getImm();
5915 
5916   LLT DstTy = MRI.getType(Src);
5917   LLT InsertTy = MRI.getType(InsertSrc);
5918 
5919   if (InsertTy.isVector() ||
5920       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
5921     return UnableToLegalize;
5922 
5923   const DataLayout &DL = MIRBuilder.getDataLayout();
5924   if ((DstTy.isPointer() &&
5925        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
5926       (InsertTy.isPointer() &&
5927        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
5928     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
5929     return UnableToLegalize;
5930   }
5931 
5932   LLT IntDstTy = DstTy;
5933 
5934   if (!DstTy.isScalar()) {
5935     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
5936     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
5937   }
5938 
5939   if (!InsertTy.isScalar()) {
5940     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
5941     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
5942   }
5943 
5944   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
5945   if (Offset != 0) {
5946     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
5947     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
5948   }
5949 
5950   APInt MaskVal = APInt::getBitsSetWithWrap(
5951       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
5952 
5953   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
5954   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
5955   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
5956 
5957   MIRBuilder.buildCast(Dst, Or);
5958   MI.eraseFromParent();
5959   return Legalized;
5960 }
5961 
5962 LegalizerHelper::LegalizeResult
5963 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
5964   Register Dst0 = MI.getOperand(0).getReg();
5965   Register Dst1 = MI.getOperand(1).getReg();
5966   Register LHS = MI.getOperand(2).getReg();
5967   Register RHS = MI.getOperand(3).getReg();
5968   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
5969 
5970   LLT Ty = MRI.getType(Dst0);
5971   LLT BoolTy = MRI.getType(Dst1);
5972 
5973   if (IsAdd)
5974     MIRBuilder.buildAdd(Dst0, LHS, RHS);
5975   else
5976     MIRBuilder.buildSub(Dst0, LHS, RHS);
5977 
5978   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
5979 
5980   auto Zero = MIRBuilder.buildConstant(Ty, 0);
5981 
5982   // For an addition, the result should be less than one of the operands (LHS)
5983   // if and only if the other operand (RHS) is negative, otherwise there will
5984   // be overflow.
5985   // For a subtraction, the result should be less than one of the operands
5986   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
5987   // otherwise there will be overflow.
5988   auto ResultLowerThanLHS =
5989       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
5990   auto ConditionRHS = MIRBuilder.buildICmp(
5991       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
5992 
5993   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
5994   MI.eraseFromParent();
5995   return Legalized;
5996 }
5997 
5998 LegalizerHelper::LegalizeResult
5999 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
6000   Register Res = MI.getOperand(0).getReg();
6001   Register LHS = MI.getOperand(1).getReg();
6002   Register RHS = MI.getOperand(2).getReg();
6003   LLT Ty = MRI.getType(Res);
6004   bool IsSigned;
6005   bool IsAdd;
6006   unsigned BaseOp;
6007   switch (MI.getOpcode()) {
6008   default:
6009     llvm_unreachable("unexpected addsat/subsat opcode");
6010   case TargetOpcode::G_UADDSAT:
6011     IsSigned = false;
6012     IsAdd = true;
6013     BaseOp = TargetOpcode::G_ADD;
6014     break;
6015   case TargetOpcode::G_SADDSAT:
6016     IsSigned = true;
6017     IsAdd = true;
6018     BaseOp = TargetOpcode::G_ADD;
6019     break;
6020   case TargetOpcode::G_USUBSAT:
6021     IsSigned = false;
6022     IsAdd = false;
6023     BaseOp = TargetOpcode::G_SUB;
6024     break;
6025   case TargetOpcode::G_SSUBSAT:
6026     IsSigned = true;
6027     IsAdd = false;
6028     BaseOp = TargetOpcode::G_SUB;
6029     break;
6030   }
6031 
6032   if (IsSigned) {
6033     // sadd.sat(a, b) ->
6034     //   hi = 0x7fffffff - smax(a, 0)
6035     //   lo = 0x80000000 - smin(a, 0)
6036     //   a + smin(smax(lo, b), hi)
6037     // ssub.sat(a, b) ->
6038     //   lo = smax(a, -1) - 0x7fffffff
6039     //   hi = smin(a, -1) - 0x80000000
6040     //   a - smin(smax(lo, b), hi)
6041     // TODO: AMDGPU can use a "median of 3" instruction here:
6042     //   a +/- med3(lo, b, hi)
6043     uint64_t NumBits = Ty.getScalarSizeInBits();
6044     auto MaxVal =
6045         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
6046     auto MinVal =
6047         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6048     MachineInstrBuilder Hi, Lo;
6049     if (IsAdd) {
6050       auto Zero = MIRBuilder.buildConstant(Ty, 0);
6051       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
6052       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
6053     } else {
6054       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
6055       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
6056                                MaxVal);
6057       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
6058                                MinVal);
6059     }
6060     auto RHSClamped =
6061         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
6062     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
6063   } else {
6064     // uadd.sat(a, b) -> a + umin(~a, b)
6065     // usub.sat(a, b) -> a - umin(a, b)
6066     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
6067     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
6068     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
6069   }
6070 
6071   MI.eraseFromParent();
6072   return Legalized;
6073 }
6074 
6075 LegalizerHelper::LegalizeResult
6076 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
6077   Register Res = MI.getOperand(0).getReg();
6078   Register LHS = MI.getOperand(1).getReg();
6079   Register RHS = MI.getOperand(2).getReg();
6080   LLT Ty = MRI.getType(Res);
6081   LLT BoolTy = Ty.changeElementSize(1);
6082   bool IsSigned;
6083   bool IsAdd;
6084   unsigned OverflowOp;
6085   switch (MI.getOpcode()) {
6086   default:
6087     llvm_unreachable("unexpected addsat/subsat opcode");
6088   case TargetOpcode::G_UADDSAT:
6089     IsSigned = false;
6090     IsAdd = true;
6091     OverflowOp = TargetOpcode::G_UADDO;
6092     break;
6093   case TargetOpcode::G_SADDSAT:
6094     IsSigned = true;
6095     IsAdd = true;
6096     OverflowOp = TargetOpcode::G_SADDO;
6097     break;
6098   case TargetOpcode::G_USUBSAT:
6099     IsSigned = false;
6100     IsAdd = false;
6101     OverflowOp = TargetOpcode::G_USUBO;
6102     break;
6103   case TargetOpcode::G_SSUBSAT:
6104     IsSigned = true;
6105     IsAdd = false;
6106     OverflowOp = TargetOpcode::G_SSUBO;
6107     break;
6108   }
6109 
6110   auto OverflowRes =
6111       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
6112   Register Tmp = OverflowRes.getReg(0);
6113   Register Ov = OverflowRes.getReg(1);
6114   MachineInstrBuilder Clamp;
6115   if (IsSigned) {
6116     // sadd.sat(a, b) ->
6117     //   {tmp, ov} = saddo(a, b)
6118     //   ov ? (tmp >>s 31) + 0x80000000 : r
6119     // ssub.sat(a, b) ->
6120     //   {tmp, ov} = ssubo(a, b)
6121     //   ov ? (tmp >>s 31) + 0x80000000 : r
6122     uint64_t NumBits = Ty.getScalarSizeInBits();
6123     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
6124     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
6125     auto MinVal =
6126         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6127     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
6128   } else {
6129     // uadd.sat(a, b) ->
6130     //   {tmp, ov} = uaddo(a, b)
6131     //   ov ? 0xffffffff : tmp
6132     // usub.sat(a, b) ->
6133     //   {tmp, ov} = usubo(a, b)
6134     //   ov ? 0 : tmp
6135     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
6136   }
6137   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
6138 
6139   MI.eraseFromParent();
6140   return Legalized;
6141 }
6142 
6143 LegalizerHelper::LegalizeResult
6144 LegalizerHelper::lowerShlSat(MachineInstr &MI) {
6145   assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
6146           MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
6147          "Expected shlsat opcode!");
6148   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
6149   Register Res = MI.getOperand(0).getReg();
6150   Register LHS = MI.getOperand(1).getReg();
6151   Register RHS = MI.getOperand(2).getReg();
6152   LLT Ty = MRI.getType(Res);
6153   LLT BoolTy = Ty.changeElementSize(1);
6154 
6155   unsigned BW = Ty.getScalarSizeInBits();
6156   auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
6157   auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
6158                        : MIRBuilder.buildLShr(Ty, Result, RHS);
6159 
6160   MachineInstrBuilder SatVal;
6161   if (IsSigned) {
6162     auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
6163     auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
6164     auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
6165                                     MIRBuilder.buildConstant(Ty, 0));
6166     SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
6167   } else {
6168     SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
6169   }
6170   auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
6171   MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
6172 
6173   MI.eraseFromParent();
6174   return Legalized;
6175 }
6176 
6177 LegalizerHelper::LegalizeResult
6178 LegalizerHelper::lowerBswap(MachineInstr &MI) {
6179   Register Dst = MI.getOperand(0).getReg();
6180   Register Src = MI.getOperand(1).getReg();
6181   const LLT Ty = MRI.getType(Src);
6182   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
6183   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
6184 
6185   // Swap most and least significant byte, set remaining bytes in Res to zero.
6186   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
6187   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
6188   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6189   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
6190 
6191   // Set i-th high/low byte in Res to i-th low/high byte from Src.
6192   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
6193     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
6194     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
6195     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
6196     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
6197     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
6198     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
6199     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
6200     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
6201     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
6202     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6203     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
6204     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
6205   }
6206   Res.getInstr()->getOperand(0).setReg(Dst);
6207 
6208   MI.eraseFromParent();
6209   return Legalized;
6210 }
6211 
6212 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
6213 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
6214                                  MachineInstrBuilder Src, APInt Mask) {
6215   const LLT Ty = Dst.getLLTTy(*B.getMRI());
6216   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
6217   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
6218   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
6219   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
6220   return B.buildOr(Dst, LHS, RHS);
6221 }
6222 
6223 LegalizerHelper::LegalizeResult
6224 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
6225   Register Dst = MI.getOperand(0).getReg();
6226   Register Src = MI.getOperand(1).getReg();
6227   const LLT Ty = MRI.getType(Src);
6228   unsigned Size = Ty.getSizeInBits();
6229 
6230   MachineInstrBuilder BSWAP =
6231       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
6232 
6233   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
6234   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
6235   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
6236   MachineInstrBuilder Swap4 =
6237       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
6238 
6239   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
6240   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
6241   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
6242   MachineInstrBuilder Swap2 =
6243       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
6244 
6245   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
6246   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
6247   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
6248   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
6249 
6250   MI.eraseFromParent();
6251   return Legalized;
6252 }
6253 
6254 LegalizerHelper::LegalizeResult
6255 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
6256   MachineFunction &MF = MIRBuilder.getMF();
6257 
6258   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
6259   int NameOpIdx = IsRead ? 1 : 0;
6260   int ValRegIndex = IsRead ? 0 : 1;
6261 
6262   Register ValReg = MI.getOperand(ValRegIndex).getReg();
6263   const LLT Ty = MRI.getType(ValReg);
6264   const MDString *RegStr = cast<MDString>(
6265     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
6266 
6267   Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
6268   if (!PhysReg.isValid())
6269     return UnableToLegalize;
6270 
6271   if (IsRead)
6272     MIRBuilder.buildCopy(ValReg, PhysReg);
6273   else
6274     MIRBuilder.buildCopy(PhysReg, ValReg);
6275 
6276   MI.eraseFromParent();
6277   return Legalized;
6278 }
6279 
6280 LegalizerHelper::LegalizeResult
6281 LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
6282   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
6283   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
6284   Register Result = MI.getOperand(0).getReg();
6285   LLT OrigTy = MRI.getType(Result);
6286   auto SizeInBits = OrigTy.getScalarSizeInBits();
6287   LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
6288 
6289   auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
6290   auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
6291   auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
6292   unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
6293 
6294   auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
6295   auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
6296   MIRBuilder.buildTrunc(Result, Shifted);
6297 
6298   MI.eraseFromParent();
6299   return Legalized;
6300 }
6301 
6302 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
6303   // Implement vector G_SELECT in terms of XOR, AND, OR.
6304   Register DstReg = MI.getOperand(0).getReg();
6305   Register MaskReg = MI.getOperand(1).getReg();
6306   Register Op1Reg = MI.getOperand(2).getReg();
6307   Register Op2Reg = MI.getOperand(3).getReg();
6308   LLT DstTy = MRI.getType(DstReg);
6309   LLT MaskTy = MRI.getType(MaskReg);
6310   LLT Op1Ty = MRI.getType(Op1Reg);
6311   if (!DstTy.isVector())
6312     return UnableToLegalize;
6313 
6314   // Vector selects can have a scalar predicate. If so, splat into a vector and
6315   // finish for later legalization attempts to try again.
6316   if (MaskTy.isScalar()) {
6317     Register MaskElt = MaskReg;
6318     if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
6319       MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
6320     // Generate a vector splat idiom to be pattern matched later.
6321     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
6322     Observer.changingInstr(MI);
6323     MI.getOperand(1).setReg(ShufSplat.getReg(0));
6324     Observer.changedInstr(MI);
6325     return Legalized;
6326   }
6327 
6328   if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
6329     return UnableToLegalize;
6330   }
6331 
6332   auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
6333   auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
6334   auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
6335   MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
6336   MI.eraseFromParent();
6337   return Legalized;
6338 }
6339