1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 in the first element of the pair if the breakdown is not
38 /// satisfiable.
39 static std::pair<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
41   assert(!LeftoverTy.isValid() && "this is an out argument");
42 
43   unsigned Size = OrigTy.getSizeInBits();
44   unsigned NarrowSize = NarrowTy.getSizeInBits();
45   unsigned NumParts = Size / NarrowSize;
46   unsigned LeftoverSize = Size - NumParts * NarrowSize;
47   assert(Size > NarrowSize);
48 
49   if (LeftoverSize == 0)
50     return {NumParts, 0};
51 
52   if (NarrowTy.isVector()) {
53     unsigned EltSize = OrigTy.getScalarSizeInBits();
54     if (LeftoverSize % EltSize != 0)
55       return {-1, -1};
56     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
57   } else {
58     LeftoverTy = LLT::scalar(LeftoverSize);
59   }
60 
61   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
62   return std::make_pair(NumParts, NumLeftover);
63 }
64 
65 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
66                                  GISelChangeObserver &Observer,
67                                  MachineIRBuilder &Builder)
68     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
69       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
70   MIRBuilder.setMF(MF);
71   MIRBuilder.setChangeObserver(Observer);
72 }
73 
74 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
75                                  GISelChangeObserver &Observer,
76                                  MachineIRBuilder &B)
77     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
78   MIRBuilder.setMF(MF);
79   MIRBuilder.setChangeObserver(Observer);
80 }
81 LegalizerHelper::LegalizeResult
82 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
83   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
84 
85   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
86       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
87     return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
88                                                      : UnableToLegalize;
89   auto Step = LI.getAction(MI, MRI);
90   switch (Step.Action) {
91   case Legal:
92     LLVM_DEBUG(dbgs() << ".. Already legal\n");
93     return AlreadyLegal;
94   case Libcall:
95     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
96     return libcall(MI);
97   case NarrowScalar:
98     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
99     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
100   case WidenScalar:
101     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
102     return widenScalar(MI, Step.TypeIdx, Step.NewType);
103   case Lower:
104     LLVM_DEBUG(dbgs() << ".. Lower\n");
105     return lower(MI, Step.TypeIdx, Step.NewType);
106   case FewerElements:
107     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
108     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
109   case MoreElements:
110     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
111     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
112   case Custom:
113     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
114     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
115                                                             : UnableToLegalize;
116   default:
117     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
118     return UnableToLegalize;
119   }
120 }
121 
122 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
123                                    SmallVectorImpl<Register> &VRegs) {
124   for (int i = 0; i < NumParts; ++i)
125     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
126   MIRBuilder.buildUnmerge(VRegs, Reg);
127 }
128 
129 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
130                                    LLT MainTy, LLT &LeftoverTy,
131                                    SmallVectorImpl<Register> &VRegs,
132                                    SmallVectorImpl<Register> &LeftoverRegs) {
133   assert(!LeftoverTy.isValid() && "this is an out argument");
134 
135   unsigned RegSize = RegTy.getSizeInBits();
136   unsigned MainSize = MainTy.getSizeInBits();
137   unsigned NumParts = RegSize / MainSize;
138   unsigned LeftoverSize = RegSize - NumParts * MainSize;
139 
140   // Use an unmerge when possible.
141   if (LeftoverSize == 0) {
142     for (unsigned I = 0; I < NumParts; ++I)
143       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
144     MIRBuilder.buildUnmerge(VRegs, Reg);
145     return true;
146   }
147 
148   if (MainTy.isVector()) {
149     unsigned EltSize = MainTy.getScalarSizeInBits();
150     if (LeftoverSize % EltSize != 0)
151       return false;
152     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
153   } else {
154     LeftoverTy = LLT::scalar(LeftoverSize);
155   }
156 
157   // For irregular sizes, extract the individual parts.
158   for (unsigned I = 0; I != NumParts; ++I) {
159     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
160     VRegs.push_back(NewReg);
161     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
162   }
163 
164   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
165        Offset += LeftoverSize) {
166     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
167     LeftoverRegs.push_back(NewReg);
168     MIRBuilder.buildExtract(NewReg, Reg, Offset);
169   }
170 
171   return true;
172 }
173 
174 void LegalizerHelper::insertParts(Register DstReg,
175                                   LLT ResultTy, LLT PartTy,
176                                   ArrayRef<Register> PartRegs,
177                                   LLT LeftoverTy,
178                                   ArrayRef<Register> LeftoverRegs) {
179   if (!LeftoverTy.isValid()) {
180     assert(LeftoverRegs.empty());
181 
182     if (!ResultTy.isVector()) {
183       MIRBuilder.buildMerge(DstReg, PartRegs);
184       return;
185     }
186 
187     if (PartTy.isVector())
188       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
189     else
190       MIRBuilder.buildBuildVector(DstReg, PartRegs);
191     return;
192   }
193 
194   unsigned PartSize = PartTy.getSizeInBits();
195   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
196 
197   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
198   MIRBuilder.buildUndef(CurResultReg);
199 
200   unsigned Offset = 0;
201   for (Register PartReg : PartRegs) {
202     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
203     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
204     CurResultReg = NewResultReg;
205     Offset += PartSize;
206   }
207 
208   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
209     // Use the original output register for the final insert to avoid a copy.
210     Register NewResultReg = (I + 1 == E) ?
211       DstReg : MRI.createGenericVirtualRegister(ResultTy);
212 
213     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
214     CurResultReg = NewResultReg;
215     Offset += LeftoverPartSize;
216   }
217 }
218 
219 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
220   switch (Opcode) {
221   case TargetOpcode::G_SDIV:
222     assert((Size == 32 || Size == 64) && "Unsupported size");
223     return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
224   case TargetOpcode::G_UDIV:
225     assert((Size == 32 || Size == 64) && "Unsupported size");
226     return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
227   case TargetOpcode::G_SREM:
228     assert((Size == 32 || Size == 64) && "Unsupported size");
229     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
230   case TargetOpcode::G_UREM:
231     assert((Size == 32 || Size == 64) && "Unsupported size");
232     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
233   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
234     assert(Size == 32 && "Unsupported size");
235     return RTLIB::CTLZ_I32;
236   case TargetOpcode::G_FADD:
237     assert((Size == 32 || Size == 64) && "Unsupported size");
238     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
239   case TargetOpcode::G_FSUB:
240     assert((Size == 32 || Size == 64) && "Unsupported size");
241     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
242   case TargetOpcode::G_FMUL:
243     assert((Size == 32 || Size == 64) && "Unsupported size");
244     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
245   case TargetOpcode::G_FDIV:
246     assert((Size == 32 || Size == 64) && "Unsupported size");
247     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
248   case TargetOpcode::G_FEXP:
249     assert((Size == 32 || Size == 64) && "Unsupported size");
250     return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
251   case TargetOpcode::G_FEXP2:
252     assert((Size == 32 || Size == 64) && "Unsupported size");
253     return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
254   case TargetOpcode::G_FREM:
255     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
256   case TargetOpcode::G_FPOW:
257     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
258   case TargetOpcode::G_FMA:
259     assert((Size == 32 || Size == 64) && "Unsupported size");
260     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
261   case TargetOpcode::G_FSIN:
262     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
263     return Size == 128 ? RTLIB::SIN_F128
264                        : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
265   case TargetOpcode::G_FCOS:
266     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
267     return Size == 128 ? RTLIB::COS_F128
268                        : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
269   case TargetOpcode::G_FLOG10:
270     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
271     return Size == 128 ? RTLIB::LOG10_F128
272                        : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
273   case TargetOpcode::G_FLOG:
274     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
275     return Size == 128 ? RTLIB::LOG_F128
276                        : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
277   case TargetOpcode::G_FLOG2:
278     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
279     return Size == 128 ? RTLIB::LOG2_F128
280                        : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
281   case TargetOpcode::G_FCEIL:
282     assert((Size == 32 || Size == 64) && "Unsupported size");
283     return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
284   case TargetOpcode::G_FFLOOR:
285     assert((Size == 32 || Size == 64) && "Unsupported size");
286     return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
287   }
288   llvm_unreachable("Unknown libcall function");
289 }
290 
291 LegalizerHelper::LegalizeResult
292 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
293                     const CallLowering::ArgInfo &Result,
294                     ArrayRef<CallLowering::ArgInfo> Args) {
295   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
296   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
297   const char *Name = TLI.getLibcallName(Libcall);
298 
299   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
300 
301   CallLowering::CallLoweringInfo Info;
302   Info.CallConv = TLI.getLibcallCallingConv(Libcall);
303   Info.Callee = MachineOperand::CreateES(Name);
304   Info.OrigRet = Result;
305   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
306   if (!CLI.lowerCall(MIRBuilder, Info))
307     return LegalizerHelper::UnableToLegalize;
308 
309   return LegalizerHelper::Legalized;
310 }
311 
312 // Useful for libcalls where all operands have the same type.
313 static LegalizerHelper::LegalizeResult
314 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
315               Type *OpType) {
316   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
317 
318   SmallVector<CallLowering::ArgInfo, 3> Args;
319   for (unsigned i = 1; i < MI.getNumOperands(); i++)
320     Args.push_back({MI.getOperand(i).getReg(), OpType});
321   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
322                        Args);
323 }
324 
325 LegalizerHelper::LegalizeResult
326 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
327                        MachineInstr &MI) {
328   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
329   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
330 
331   SmallVector<CallLowering::ArgInfo, 3> Args;
332   for (unsigned i = 1; i < MI.getNumOperands(); i++) {
333     Register Reg = MI.getOperand(i).getReg();
334 
335     // Need derive an IR type for call lowering.
336     LLT OpLLT = MRI.getType(Reg);
337     Type *OpTy = nullptr;
338     if (OpLLT.isPointer())
339       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
340     else
341       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
342     Args.push_back({Reg, OpTy});
343   }
344 
345   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
346   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
347   Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
348   RTLIB::Libcall RTLibcall;
349   switch (ID) {
350   case Intrinsic::memcpy:
351     RTLibcall = RTLIB::MEMCPY;
352     break;
353   case Intrinsic::memset:
354     RTLibcall = RTLIB::MEMSET;
355     break;
356   case Intrinsic::memmove:
357     RTLibcall = RTLIB::MEMMOVE;
358     break;
359   default:
360     return LegalizerHelper::UnableToLegalize;
361   }
362   const char *Name = TLI.getLibcallName(RTLibcall);
363 
364   MIRBuilder.setInstr(MI);
365   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
366 
367   CallLowering::CallLoweringInfo Info;
368   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
369   Info.Callee = MachineOperand::CreateES(Name);
370   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
371   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
372   if (!CLI.lowerCall(MIRBuilder, Info))
373     return LegalizerHelper::UnableToLegalize;
374 
375   return LegalizerHelper::Legalized;
376 }
377 
378 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
379                                        Type *FromType) {
380   auto ToMVT = MVT::getVT(ToType);
381   auto FromMVT = MVT::getVT(FromType);
382 
383   switch (Opcode) {
384   case TargetOpcode::G_FPEXT:
385     return RTLIB::getFPEXT(FromMVT, ToMVT);
386   case TargetOpcode::G_FPTRUNC:
387     return RTLIB::getFPROUND(FromMVT, ToMVT);
388   case TargetOpcode::G_FPTOSI:
389     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
390   case TargetOpcode::G_FPTOUI:
391     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
392   case TargetOpcode::G_SITOFP:
393     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
394   case TargetOpcode::G_UITOFP:
395     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
396   }
397   llvm_unreachable("Unsupported libcall function");
398 }
399 
400 static LegalizerHelper::LegalizeResult
401 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
402                   Type *FromType) {
403   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
404   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
405                        {{MI.getOperand(1).getReg(), FromType}});
406 }
407 
408 LegalizerHelper::LegalizeResult
409 LegalizerHelper::libcall(MachineInstr &MI) {
410   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
411   unsigned Size = LLTy.getSizeInBits();
412   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
413 
414   MIRBuilder.setInstr(MI);
415 
416   switch (MI.getOpcode()) {
417   default:
418     return UnableToLegalize;
419   case TargetOpcode::G_SDIV:
420   case TargetOpcode::G_UDIV:
421   case TargetOpcode::G_SREM:
422   case TargetOpcode::G_UREM:
423   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
424     Type *HLTy = IntegerType::get(Ctx, Size);
425     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
426     if (Status != Legalized)
427       return Status;
428     break;
429   }
430   case TargetOpcode::G_FADD:
431   case TargetOpcode::G_FSUB:
432   case TargetOpcode::G_FMUL:
433   case TargetOpcode::G_FDIV:
434   case TargetOpcode::G_FMA:
435   case TargetOpcode::G_FPOW:
436   case TargetOpcode::G_FREM:
437   case TargetOpcode::G_FCOS:
438   case TargetOpcode::G_FSIN:
439   case TargetOpcode::G_FLOG10:
440   case TargetOpcode::G_FLOG:
441   case TargetOpcode::G_FLOG2:
442   case TargetOpcode::G_FEXP:
443   case TargetOpcode::G_FEXP2:
444   case TargetOpcode::G_FCEIL:
445   case TargetOpcode::G_FFLOOR: {
446     if (Size > 64) {
447       LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
448       return UnableToLegalize;
449     }
450     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
451     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
452     if (Status != Legalized)
453       return Status;
454     break;
455   }
456   case TargetOpcode::G_FPEXT: {
457     // FIXME: Support other floating point types (half, fp128 etc)
458     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
459     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
460     if (ToSize != 64 || FromSize != 32)
461       return UnableToLegalize;
462     LegalizeResult Status = conversionLibcall(
463         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
464     if (Status != Legalized)
465       return Status;
466     break;
467   }
468   case TargetOpcode::G_FPTRUNC: {
469     // FIXME: Support other floating point types (half, fp128 etc)
470     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
471     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
472     if (ToSize != 32 || FromSize != 64)
473       return UnableToLegalize;
474     LegalizeResult Status = conversionLibcall(
475         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
476     if (Status != Legalized)
477       return Status;
478     break;
479   }
480   case TargetOpcode::G_FPTOSI:
481   case TargetOpcode::G_FPTOUI: {
482     // FIXME: Support other types
483     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
484     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
485     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
486       return UnableToLegalize;
487     LegalizeResult Status = conversionLibcall(
488         MI, MIRBuilder,
489         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
490         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
491     if (Status != Legalized)
492       return Status;
493     break;
494   }
495   case TargetOpcode::G_SITOFP:
496   case TargetOpcode::G_UITOFP: {
497     // FIXME: Support other types
498     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
499     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
500     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
501       return UnableToLegalize;
502     LegalizeResult Status = conversionLibcall(
503         MI, MIRBuilder,
504         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
505         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
506     if (Status != Legalized)
507       return Status;
508     break;
509   }
510   }
511 
512   MI.eraseFromParent();
513   return Legalized;
514 }
515 
516 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
517                                                               unsigned TypeIdx,
518                                                               LLT NarrowTy) {
519   MIRBuilder.setInstr(MI);
520 
521   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
522   uint64_t NarrowSize = NarrowTy.getSizeInBits();
523 
524   switch (MI.getOpcode()) {
525   default:
526     return UnableToLegalize;
527   case TargetOpcode::G_IMPLICIT_DEF: {
528     // FIXME: add support for when SizeOp0 isn't an exact multiple of
529     // NarrowSize.
530     if (SizeOp0 % NarrowSize != 0)
531       return UnableToLegalize;
532     int NumParts = SizeOp0 / NarrowSize;
533 
534     SmallVector<Register, 2> DstRegs;
535     for (int i = 0; i < NumParts; ++i)
536       DstRegs.push_back(
537           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
538 
539     Register DstReg = MI.getOperand(0).getReg();
540     if(MRI.getType(DstReg).isVector())
541       MIRBuilder.buildBuildVector(DstReg, DstRegs);
542     else
543       MIRBuilder.buildMerge(DstReg, DstRegs);
544     MI.eraseFromParent();
545     return Legalized;
546   }
547   case TargetOpcode::G_CONSTANT: {
548     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
549     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
550     unsigned TotalSize = Ty.getSizeInBits();
551     unsigned NarrowSize = NarrowTy.getSizeInBits();
552     int NumParts = TotalSize / NarrowSize;
553 
554     SmallVector<Register, 4> PartRegs;
555     for (int I = 0; I != NumParts; ++I) {
556       unsigned Offset = I * NarrowSize;
557       auto K = MIRBuilder.buildConstant(NarrowTy,
558                                         Val.lshr(Offset).trunc(NarrowSize));
559       PartRegs.push_back(K.getReg(0));
560     }
561 
562     LLT LeftoverTy;
563     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
564     SmallVector<Register, 1> LeftoverRegs;
565     if (LeftoverBits != 0) {
566       LeftoverTy = LLT::scalar(LeftoverBits);
567       auto K = MIRBuilder.buildConstant(
568         LeftoverTy,
569         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
570       LeftoverRegs.push_back(K.getReg(0));
571     }
572 
573     insertParts(MI.getOperand(0).getReg(),
574                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
575 
576     MI.eraseFromParent();
577     return Legalized;
578   }
579   case TargetOpcode::G_SEXT: {
580     if (TypeIdx != 0)
581       return UnableToLegalize;
582 
583     if (NarrowTy.getSizeInBits() != SizeOp0 / 2) {
584       LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
585       return UnableToLegalize;
586     }
587 
588     Register SrcReg = MI.getOperand(1).getReg();
589 
590     // Shift the sign bit of the low register through the high register.
591     auto ShiftAmt =
592         MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
593     auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
594     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
595     MI.eraseFromParent();
596     return Legalized;
597   }
598 
599   case TargetOpcode::G_ADD: {
600     // FIXME: add support for when SizeOp0 isn't an exact multiple of
601     // NarrowSize.
602     if (SizeOp0 % NarrowSize != 0)
603       return UnableToLegalize;
604     // Expand in terms of carry-setting/consuming G_ADDE instructions.
605     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
606 
607     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
608     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
609     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
610 
611     Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
612     MIRBuilder.buildConstant(CarryIn, 0);
613 
614     for (int i = 0; i < NumParts; ++i) {
615       Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
616       Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
617 
618       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
619                             Src2Regs[i], CarryIn);
620 
621       DstRegs.push_back(DstReg);
622       CarryIn = CarryOut;
623     }
624     Register DstReg = MI.getOperand(0).getReg();
625     if(MRI.getType(DstReg).isVector())
626       MIRBuilder.buildBuildVector(DstReg, DstRegs);
627     else
628       MIRBuilder.buildMerge(DstReg, DstRegs);
629     MI.eraseFromParent();
630     return Legalized;
631   }
632   case TargetOpcode::G_SUB: {
633     // FIXME: add support for when SizeOp0 isn't an exact multiple of
634     // NarrowSize.
635     if (SizeOp0 % NarrowSize != 0)
636       return UnableToLegalize;
637 
638     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
639 
640     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
641     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
642     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
643 
644     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
645     Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
646     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
647                           {Src1Regs[0], Src2Regs[0]});
648     DstRegs.push_back(DstReg);
649     Register BorrowIn = BorrowOut;
650     for (int i = 1; i < NumParts; ++i) {
651       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
652       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
653 
654       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
655                             {Src1Regs[i], Src2Regs[i], BorrowIn});
656 
657       DstRegs.push_back(DstReg);
658       BorrowIn = BorrowOut;
659     }
660     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
661     MI.eraseFromParent();
662     return Legalized;
663   }
664   case TargetOpcode::G_MUL:
665   case TargetOpcode::G_UMULH:
666     return narrowScalarMul(MI, NarrowTy);
667   case TargetOpcode::G_EXTRACT:
668     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
669   case TargetOpcode::G_INSERT:
670     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
671   case TargetOpcode::G_LOAD: {
672     const auto &MMO = **MI.memoperands_begin();
673     Register DstReg = MI.getOperand(0).getReg();
674     LLT DstTy = MRI.getType(DstReg);
675     if (DstTy.isVector())
676       return UnableToLegalize;
677 
678     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
679       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
680       auto &MMO = **MI.memoperands_begin();
681       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
682       MIRBuilder.buildAnyExt(DstReg, TmpReg);
683       MI.eraseFromParent();
684       return Legalized;
685     }
686 
687     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
688   }
689   case TargetOpcode::G_ZEXTLOAD:
690   case TargetOpcode::G_SEXTLOAD: {
691     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
692     Register DstReg = MI.getOperand(0).getReg();
693     Register PtrReg = MI.getOperand(1).getReg();
694 
695     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
696     auto &MMO = **MI.memoperands_begin();
697     if (MMO.getSizeInBits() == NarrowSize) {
698       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
699     } else {
700       unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
701         : TargetOpcode::G_SEXTLOAD;
702       MIRBuilder.buildInstr(ExtLoad)
703         .addDef(TmpReg)
704         .addUse(PtrReg)
705         .addMemOperand(&MMO);
706     }
707 
708     if (ZExt)
709       MIRBuilder.buildZExt(DstReg, TmpReg);
710     else
711       MIRBuilder.buildSExt(DstReg, TmpReg);
712 
713     MI.eraseFromParent();
714     return Legalized;
715   }
716   case TargetOpcode::G_STORE: {
717     const auto &MMO = **MI.memoperands_begin();
718 
719     Register SrcReg = MI.getOperand(0).getReg();
720     LLT SrcTy = MRI.getType(SrcReg);
721     if (SrcTy.isVector())
722       return UnableToLegalize;
723 
724     int NumParts = SizeOp0 / NarrowSize;
725     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
726     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
727     if (SrcTy.isVector() && LeftoverBits != 0)
728       return UnableToLegalize;
729 
730     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
731       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
732       auto &MMO = **MI.memoperands_begin();
733       MIRBuilder.buildTrunc(TmpReg, SrcReg);
734       MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
735       MI.eraseFromParent();
736       return Legalized;
737     }
738 
739     return reduceLoadStoreWidth(MI, 0, NarrowTy);
740   }
741   case TargetOpcode::G_SELECT:
742     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
743   case TargetOpcode::G_AND:
744   case TargetOpcode::G_OR:
745   case TargetOpcode::G_XOR: {
746     // Legalize bitwise operation:
747     // A = BinOp<Ty> B, C
748     // into:
749     // B1, ..., BN = G_UNMERGE_VALUES B
750     // C1, ..., CN = G_UNMERGE_VALUES C
751     // A1 = BinOp<Ty/N> B1, C2
752     // ...
753     // AN = BinOp<Ty/N> BN, CN
754     // A = G_MERGE_VALUES A1, ..., AN
755     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
756   }
757   case TargetOpcode::G_SHL:
758   case TargetOpcode::G_LSHR:
759   case TargetOpcode::G_ASHR:
760     return narrowScalarShift(MI, TypeIdx, NarrowTy);
761   case TargetOpcode::G_CTLZ:
762   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
763   case TargetOpcode::G_CTTZ:
764   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
765   case TargetOpcode::G_CTPOP:
766     if (TypeIdx != 0)
767       return UnableToLegalize; // TODO
768 
769     Observer.changingInstr(MI);
770     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
771     Observer.changedInstr(MI);
772     return Legalized;
773   case TargetOpcode::G_INTTOPTR:
774     if (TypeIdx != 1)
775       return UnableToLegalize;
776 
777     Observer.changingInstr(MI);
778     narrowScalarSrc(MI, NarrowTy, 1);
779     Observer.changedInstr(MI);
780     return Legalized;
781   case TargetOpcode::G_PTRTOINT:
782     if (TypeIdx != 0)
783       return UnableToLegalize;
784 
785     Observer.changingInstr(MI);
786     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
787     Observer.changedInstr(MI);
788     return Legalized;
789   case TargetOpcode::G_PHI: {
790     unsigned NumParts = SizeOp0 / NarrowSize;
791     SmallVector<Register, 2> DstRegs;
792     SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
793     DstRegs.resize(NumParts);
794     SrcRegs.resize(MI.getNumOperands() / 2);
795     Observer.changingInstr(MI);
796     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
797       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
798       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
799       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
800                    SrcRegs[i / 2]);
801     }
802     MachineBasicBlock &MBB = *MI.getParent();
803     MIRBuilder.setInsertPt(MBB, MI);
804     for (unsigned i = 0; i < NumParts; ++i) {
805       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
806       MachineInstrBuilder MIB =
807           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
808       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
809         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
810     }
811     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
812     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
813     Observer.changedInstr(MI);
814     MI.eraseFromParent();
815     return Legalized;
816   }
817   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
818   case TargetOpcode::G_INSERT_VECTOR_ELT: {
819     if (TypeIdx != 2)
820       return UnableToLegalize;
821 
822     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
823     Observer.changingInstr(MI);
824     narrowScalarSrc(MI, NarrowTy, OpIdx);
825     Observer.changedInstr(MI);
826     return Legalized;
827   }
828   case TargetOpcode::G_ICMP: {
829     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
830     if (NarrowSize * 2 != SrcSize)
831       return UnableToLegalize;
832 
833     Observer.changingInstr(MI);
834     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
835     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
836     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
837 
838     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
839     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
840     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
841 
842     CmpInst::Predicate Pred =
843         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
844     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
845 
846     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
847       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
848       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
849       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
850       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
851       MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
852     } else {
853       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
854       MachineInstrBuilder CmpHEQ =
855           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
856       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
857           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
858       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
859     }
860     Observer.changedInstr(MI);
861     MI.eraseFromParent();
862     return Legalized;
863   }
864   }
865 }
866 
867 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
868                                      unsigned OpIdx, unsigned ExtOpcode) {
869   MachineOperand &MO = MI.getOperand(OpIdx);
870   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
871   MO.setReg(ExtB->getOperand(0).getReg());
872 }
873 
874 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
875                                       unsigned OpIdx) {
876   MachineOperand &MO = MI.getOperand(OpIdx);
877   auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
878                                     {MO.getReg()});
879   MO.setReg(ExtB->getOperand(0).getReg());
880 }
881 
882 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
883                                      unsigned OpIdx, unsigned TruncOpcode) {
884   MachineOperand &MO = MI.getOperand(OpIdx);
885   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
886   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
887   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
888   MO.setReg(DstExt);
889 }
890 
891 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
892                                       unsigned OpIdx, unsigned ExtOpcode) {
893   MachineOperand &MO = MI.getOperand(OpIdx);
894   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
895   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
896   MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
897   MO.setReg(DstTrunc);
898 }
899 
900 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
901                                             unsigned OpIdx) {
902   MachineOperand &MO = MI.getOperand(OpIdx);
903   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
904   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
905   MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
906   MO.setReg(DstExt);
907 }
908 
909 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
910                                             unsigned OpIdx) {
911   MachineOperand &MO = MI.getOperand(OpIdx);
912 
913   LLT OldTy = MRI.getType(MO.getReg());
914   unsigned OldElts = OldTy.getNumElements();
915   unsigned NewElts = MoreTy.getNumElements();
916 
917   unsigned NumParts = NewElts / OldElts;
918 
919   // Use concat_vectors if the result is a multiple of the number of elements.
920   if (NumParts * OldElts == NewElts) {
921     SmallVector<Register, 8> Parts;
922     Parts.push_back(MO.getReg());
923 
924     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
925     for (unsigned I = 1; I != NumParts; ++I)
926       Parts.push_back(ImpDef);
927 
928     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
929     MO.setReg(Concat.getReg(0));
930     return;
931   }
932 
933   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
934   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
935   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
936   MO.setReg(MoreReg);
937 }
938 
939 LegalizerHelper::LegalizeResult
940 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
941                                         LLT WideTy) {
942   if (TypeIdx != 1)
943     return UnableToLegalize;
944 
945   Register DstReg = MI.getOperand(0).getReg();
946   LLT DstTy = MRI.getType(DstReg);
947   if (DstTy.isVector())
948     return UnableToLegalize;
949 
950   Register Src1 = MI.getOperand(1).getReg();
951   LLT SrcTy = MRI.getType(Src1);
952   const int DstSize = DstTy.getSizeInBits();
953   const int SrcSize = SrcTy.getSizeInBits();
954   const int WideSize = WideTy.getSizeInBits();
955   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
956 
957   unsigned NumOps = MI.getNumOperands();
958   unsigned NumSrc = MI.getNumOperands() - 1;
959   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
960 
961   if (WideSize >= DstSize) {
962     // Directly pack the bits in the target type.
963     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
964 
965     for (unsigned I = 2; I != NumOps; ++I) {
966       const unsigned Offset = (I - 1) * PartSize;
967 
968       Register SrcReg = MI.getOperand(I).getReg();
969       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
970 
971       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
972 
973       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
974         MRI.createGenericVirtualRegister(WideTy);
975 
976       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
977       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
978       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
979       ResultReg = NextResult;
980     }
981 
982     if (WideSize > DstSize)
983       MIRBuilder.buildTrunc(DstReg, ResultReg);
984     else if (DstTy.isPointer())
985       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
986 
987     MI.eraseFromParent();
988     return Legalized;
989   }
990 
991   // Unmerge the original values to the GCD type, and recombine to the next
992   // multiple greater than the original type.
993   //
994   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
995   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
996   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
997   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
998   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
999   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1000   // %12:_(s12) = G_MERGE_VALUES %10, %11
1001   //
1002   // Padding with undef if necessary:
1003   //
1004   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1005   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1006   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1007   // %7:_(s2) = G_IMPLICIT_DEF
1008   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1009   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1010   // %10:_(s12) = G_MERGE_VALUES %8, %9
1011 
1012   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1013   LLT GCDTy = LLT::scalar(GCD);
1014 
1015   SmallVector<Register, 8> Parts;
1016   SmallVector<Register, 8> NewMergeRegs;
1017   SmallVector<Register, 8> Unmerges;
1018   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1019 
1020   // Decompose the original operands if they don't evenly divide.
1021   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1022     Register SrcReg = MI.getOperand(I).getReg();
1023     if (GCD == SrcSize) {
1024       Unmerges.push_back(SrcReg);
1025     } else {
1026       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1027       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1028         Unmerges.push_back(Unmerge.getReg(J));
1029     }
1030   }
1031 
1032   // Pad with undef to the next size that is a multiple of the requested size.
1033   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1034     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1035     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1036       Unmerges.push_back(UndefReg);
1037   }
1038 
1039   const int PartsPerGCD = WideSize / GCD;
1040 
1041   // Build merges of each piece.
1042   ArrayRef<Register> Slicer(Unmerges);
1043   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1044     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1045     NewMergeRegs.push_back(Merge.getReg(0));
1046   }
1047 
1048   // A truncate may be necessary if the requested type doesn't evenly divide the
1049   // original result type.
1050   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1051     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1052   } else {
1053     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1054     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1055   }
1056 
1057   MI.eraseFromParent();
1058   return Legalized;
1059 }
1060 
1061 LegalizerHelper::LegalizeResult
1062 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1063                                           LLT WideTy) {
1064   if (TypeIdx != 0)
1065     return UnableToLegalize;
1066 
1067   unsigned NumDst = MI.getNumOperands() - 1;
1068   Register SrcReg = MI.getOperand(NumDst).getReg();
1069   LLT SrcTy = MRI.getType(SrcReg);
1070   if (!SrcTy.isScalar())
1071     return UnableToLegalize;
1072 
1073   Register Dst0Reg = MI.getOperand(0).getReg();
1074   LLT DstTy = MRI.getType(Dst0Reg);
1075   if (!DstTy.isScalar())
1076     return UnableToLegalize;
1077 
1078   unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1079   LLT NewSrcTy = LLT::scalar(NewSrcSize);
1080   unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1081 
1082   auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1083 
1084   for (unsigned I = 1; I != NumDst; ++I) {
1085     auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1086     auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1087     WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1088   }
1089 
1090   Observer.changingInstr(MI);
1091 
1092   MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1093   for (unsigned I = 0; I != NumDst; ++I)
1094     widenScalarDst(MI, WideTy, I);
1095 
1096   Observer.changedInstr(MI);
1097 
1098   return Legalized;
1099 }
1100 
1101 LegalizerHelper::LegalizeResult
1102 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1103                                     LLT WideTy) {
1104   Register DstReg = MI.getOperand(0).getReg();
1105   Register SrcReg = MI.getOperand(1).getReg();
1106   LLT SrcTy = MRI.getType(SrcReg);
1107 
1108   LLT DstTy = MRI.getType(DstReg);
1109   unsigned Offset = MI.getOperand(2).getImm();
1110 
1111   if (TypeIdx == 0) {
1112     if (SrcTy.isVector() || DstTy.isVector())
1113       return UnableToLegalize;
1114 
1115     SrcOp Src(SrcReg);
1116     if (SrcTy.isPointer()) {
1117       // Extracts from pointers can be handled only if they are really just
1118       // simple integers.
1119       const DataLayout &DL = MIRBuilder.getDataLayout();
1120       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1121         return UnableToLegalize;
1122 
1123       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1124       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1125       SrcTy = SrcAsIntTy;
1126     }
1127 
1128     if (DstTy.isPointer())
1129       return UnableToLegalize;
1130 
1131     if (Offset == 0) {
1132       // Avoid a shift in the degenerate case.
1133       MIRBuilder.buildTrunc(DstReg,
1134                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1135       MI.eraseFromParent();
1136       return Legalized;
1137     }
1138 
1139     // Do a shift in the source type.
1140     LLT ShiftTy = SrcTy;
1141     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1142       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1143       ShiftTy = WideTy;
1144     } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1145       return UnableToLegalize;
1146 
1147     auto LShr = MIRBuilder.buildLShr(
1148       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1149     MIRBuilder.buildTrunc(DstReg, LShr);
1150     MI.eraseFromParent();
1151     return Legalized;
1152   }
1153 
1154   if (SrcTy.isScalar()) {
1155     Observer.changingInstr(MI);
1156     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1157     Observer.changedInstr(MI);
1158     return Legalized;
1159   }
1160 
1161   if (!SrcTy.isVector())
1162     return UnableToLegalize;
1163 
1164   if (DstTy != SrcTy.getElementType())
1165     return UnableToLegalize;
1166 
1167   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1168     return UnableToLegalize;
1169 
1170   Observer.changingInstr(MI);
1171   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1172 
1173   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1174                           Offset);
1175   widenScalarDst(MI, WideTy.getScalarType(), 0);
1176   Observer.changedInstr(MI);
1177   return Legalized;
1178 }
1179 
1180 LegalizerHelper::LegalizeResult
1181 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1182                                    LLT WideTy) {
1183   if (TypeIdx != 0)
1184     return UnableToLegalize;
1185   Observer.changingInstr(MI);
1186   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1187   widenScalarDst(MI, WideTy);
1188   Observer.changedInstr(MI);
1189   return Legalized;
1190 }
1191 
1192 LegalizerHelper::LegalizeResult
1193 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1194   MIRBuilder.setInstr(MI);
1195 
1196   switch (MI.getOpcode()) {
1197   default:
1198     return UnableToLegalize;
1199   case TargetOpcode::G_EXTRACT:
1200     return widenScalarExtract(MI, TypeIdx, WideTy);
1201   case TargetOpcode::G_INSERT:
1202     return widenScalarInsert(MI, TypeIdx, WideTy);
1203   case TargetOpcode::G_MERGE_VALUES:
1204     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1205   case TargetOpcode::G_UNMERGE_VALUES:
1206     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1207   case TargetOpcode::G_UADDO:
1208   case TargetOpcode::G_USUBO: {
1209     if (TypeIdx == 1)
1210       return UnableToLegalize; // TODO
1211     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1212                                          {MI.getOperand(2).getReg()});
1213     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1214                                          {MI.getOperand(3).getReg()});
1215     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1216                           ? TargetOpcode::G_ADD
1217                           : TargetOpcode::G_SUB;
1218     // Do the arithmetic in the larger type.
1219     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1220     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1221     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
1222     auto AndOp = MIRBuilder.buildInstr(
1223         TargetOpcode::G_AND, {WideTy},
1224         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1225     // There is no overflow if the AndOp is the same as NewOp.
1226     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
1227                          AndOp);
1228     // Now trunc the NewOp to the original result.
1229     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1230     MI.eraseFromParent();
1231     return Legalized;
1232   }
1233   case TargetOpcode::G_CTTZ:
1234   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1235   case TargetOpcode::G_CTLZ:
1236   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1237   case TargetOpcode::G_CTPOP: {
1238     if (TypeIdx == 0) {
1239       Observer.changingInstr(MI);
1240       widenScalarDst(MI, WideTy, 0);
1241       Observer.changedInstr(MI);
1242       return Legalized;
1243     }
1244 
1245     Register SrcReg = MI.getOperand(1).getReg();
1246 
1247     // First ZEXT the input.
1248     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1249     LLT CurTy = MRI.getType(SrcReg);
1250     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1251       // The count is the same in the larger type except if the original
1252       // value was zero.  This can be handled by setting the bit just off
1253       // the top of the original type.
1254       auto TopBit =
1255           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1256       MIBSrc = MIRBuilder.buildOr(
1257         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1258     }
1259 
1260     // Perform the operation at the larger size.
1261     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1262     // This is already the correct result for CTPOP and CTTZs
1263     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1264         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1265       // The correct result is NewOp - (Difference in widety and current ty).
1266       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1267       MIBNewOp = MIRBuilder.buildInstr(
1268           TargetOpcode::G_SUB, {WideTy},
1269           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1270     }
1271 
1272     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1273     MI.eraseFromParent();
1274     return Legalized;
1275   }
1276   case TargetOpcode::G_BSWAP: {
1277     Observer.changingInstr(MI);
1278     Register DstReg = MI.getOperand(0).getReg();
1279 
1280     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1281     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1282     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1283     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1284 
1285     MI.getOperand(0).setReg(DstExt);
1286 
1287     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1288 
1289     LLT Ty = MRI.getType(DstReg);
1290     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1291     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1292     MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1293       .addDef(ShrReg)
1294       .addUse(DstExt)
1295       .addUse(ShiftAmtReg);
1296 
1297     MIRBuilder.buildTrunc(DstReg, ShrReg);
1298     Observer.changedInstr(MI);
1299     return Legalized;
1300   }
1301   case TargetOpcode::G_ADD:
1302   case TargetOpcode::G_AND:
1303   case TargetOpcode::G_MUL:
1304   case TargetOpcode::G_OR:
1305   case TargetOpcode::G_XOR:
1306   case TargetOpcode::G_SUB:
1307     // Perform operation at larger width (any extension is fines here, high bits
1308     // don't affect the result) and then truncate the result back to the
1309     // original type.
1310     Observer.changingInstr(MI);
1311     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1312     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1313     widenScalarDst(MI, WideTy);
1314     Observer.changedInstr(MI);
1315     return Legalized;
1316 
1317   case TargetOpcode::G_SHL:
1318     Observer.changingInstr(MI);
1319 
1320     if (TypeIdx == 0) {
1321       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1322       widenScalarDst(MI, WideTy);
1323     } else {
1324       assert(TypeIdx == 1);
1325       // The "number of bits to shift" operand must preserve its value as an
1326       // unsigned integer:
1327       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1328     }
1329 
1330     Observer.changedInstr(MI);
1331     return Legalized;
1332 
1333   case TargetOpcode::G_SDIV:
1334   case TargetOpcode::G_SREM:
1335   case TargetOpcode::G_SMIN:
1336   case TargetOpcode::G_SMAX:
1337     Observer.changingInstr(MI);
1338     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1339     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1340     widenScalarDst(MI, WideTy);
1341     Observer.changedInstr(MI);
1342     return Legalized;
1343 
1344   case TargetOpcode::G_ASHR:
1345   case TargetOpcode::G_LSHR:
1346     Observer.changingInstr(MI);
1347 
1348     if (TypeIdx == 0) {
1349       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1350         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1351 
1352       widenScalarSrc(MI, WideTy, 1, CvtOp);
1353       widenScalarDst(MI, WideTy);
1354     } else {
1355       assert(TypeIdx == 1);
1356       // The "number of bits to shift" operand must preserve its value as an
1357       // unsigned integer:
1358       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1359     }
1360 
1361     Observer.changedInstr(MI);
1362     return Legalized;
1363   case TargetOpcode::G_UDIV:
1364   case TargetOpcode::G_UREM:
1365   case TargetOpcode::G_UMIN:
1366   case TargetOpcode::G_UMAX:
1367     Observer.changingInstr(MI);
1368     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1369     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1370     widenScalarDst(MI, WideTy);
1371     Observer.changedInstr(MI);
1372     return Legalized;
1373 
1374   case TargetOpcode::G_SELECT:
1375     Observer.changingInstr(MI);
1376     if (TypeIdx == 0) {
1377       // Perform operation at larger width (any extension is fine here, high
1378       // bits don't affect the result) and then truncate the result back to the
1379       // original type.
1380       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1381       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1382       widenScalarDst(MI, WideTy);
1383     } else {
1384       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1385       // Explicit extension is required here since high bits affect the result.
1386       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1387     }
1388     Observer.changedInstr(MI);
1389     return Legalized;
1390 
1391   case TargetOpcode::G_FPTOSI:
1392   case TargetOpcode::G_FPTOUI:
1393     if (TypeIdx != 0)
1394       return UnableToLegalize;
1395     Observer.changingInstr(MI);
1396     widenScalarDst(MI, WideTy);
1397     Observer.changedInstr(MI);
1398     return Legalized;
1399 
1400   case TargetOpcode::G_SITOFP:
1401     if (TypeIdx != 1)
1402       return UnableToLegalize;
1403     Observer.changingInstr(MI);
1404     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1405     Observer.changedInstr(MI);
1406     return Legalized;
1407 
1408   case TargetOpcode::G_UITOFP:
1409     if (TypeIdx != 1)
1410       return UnableToLegalize;
1411     Observer.changingInstr(MI);
1412     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1413     Observer.changedInstr(MI);
1414     return Legalized;
1415 
1416   case TargetOpcode::G_LOAD:
1417   case TargetOpcode::G_SEXTLOAD:
1418   case TargetOpcode::G_ZEXTLOAD:
1419     Observer.changingInstr(MI);
1420     widenScalarDst(MI, WideTy);
1421     Observer.changedInstr(MI);
1422     return Legalized;
1423 
1424   case TargetOpcode::G_STORE: {
1425     if (TypeIdx != 0)
1426       return UnableToLegalize;
1427 
1428     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1429     if (!isPowerOf2_32(Ty.getSizeInBits()))
1430       return UnableToLegalize;
1431 
1432     Observer.changingInstr(MI);
1433 
1434     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1435       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1436     widenScalarSrc(MI, WideTy, 0, ExtType);
1437 
1438     Observer.changedInstr(MI);
1439     return Legalized;
1440   }
1441   case TargetOpcode::G_CONSTANT: {
1442     MachineOperand &SrcMO = MI.getOperand(1);
1443     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1444     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1445     Observer.changingInstr(MI);
1446     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1447 
1448     widenScalarDst(MI, WideTy);
1449     Observer.changedInstr(MI);
1450     return Legalized;
1451   }
1452   case TargetOpcode::G_FCONSTANT: {
1453     MachineOperand &SrcMO = MI.getOperand(1);
1454     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1455     APFloat Val = SrcMO.getFPImm()->getValueAPF();
1456     bool LosesInfo;
1457     switch (WideTy.getSizeInBits()) {
1458     case 32:
1459       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1460                   &LosesInfo);
1461       break;
1462     case 64:
1463       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1464                   &LosesInfo);
1465       break;
1466     default:
1467       return UnableToLegalize;
1468     }
1469 
1470     assert(!LosesInfo && "extend should always be lossless");
1471 
1472     Observer.changingInstr(MI);
1473     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1474 
1475     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1476     Observer.changedInstr(MI);
1477     return Legalized;
1478   }
1479   case TargetOpcode::G_IMPLICIT_DEF: {
1480     Observer.changingInstr(MI);
1481     widenScalarDst(MI, WideTy);
1482     Observer.changedInstr(MI);
1483     return Legalized;
1484   }
1485   case TargetOpcode::G_BRCOND:
1486     Observer.changingInstr(MI);
1487     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1488     Observer.changedInstr(MI);
1489     return Legalized;
1490 
1491   case TargetOpcode::G_FCMP:
1492     Observer.changingInstr(MI);
1493     if (TypeIdx == 0)
1494       widenScalarDst(MI, WideTy);
1495     else {
1496       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1497       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1498     }
1499     Observer.changedInstr(MI);
1500     return Legalized;
1501 
1502   case TargetOpcode::G_ICMP:
1503     Observer.changingInstr(MI);
1504     if (TypeIdx == 0)
1505       widenScalarDst(MI, WideTy);
1506     else {
1507       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1508                                MI.getOperand(1).getPredicate()))
1509                                ? TargetOpcode::G_SEXT
1510                                : TargetOpcode::G_ZEXT;
1511       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1512       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1513     }
1514     Observer.changedInstr(MI);
1515     return Legalized;
1516 
1517   case TargetOpcode::G_GEP:
1518     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1519     Observer.changingInstr(MI);
1520     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1521     Observer.changedInstr(MI);
1522     return Legalized;
1523 
1524   case TargetOpcode::G_PHI: {
1525     assert(TypeIdx == 0 && "Expecting only Idx 0");
1526 
1527     Observer.changingInstr(MI);
1528     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1529       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1530       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1531       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1532     }
1533 
1534     MachineBasicBlock &MBB = *MI.getParent();
1535     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1536     widenScalarDst(MI, WideTy);
1537     Observer.changedInstr(MI);
1538     return Legalized;
1539   }
1540   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1541     if (TypeIdx == 0) {
1542       Register VecReg = MI.getOperand(1).getReg();
1543       LLT VecTy = MRI.getType(VecReg);
1544       Observer.changingInstr(MI);
1545 
1546       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1547                                      WideTy.getSizeInBits()),
1548                      1, TargetOpcode::G_SEXT);
1549 
1550       widenScalarDst(MI, WideTy, 0);
1551       Observer.changedInstr(MI);
1552       return Legalized;
1553     }
1554 
1555     if (TypeIdx != 2)
1556       return UnableToLegalize;
1557     Observer.changingInstr(MI);
1558     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1559     Observer.changedInstr(MI);
1560     return Legalized;
1561   }
1562   case TargetOpcode::G_FADD:
1563   case TargetOpcode::G_FMUL:
1564   case TargetOpcode::G_FSUB:
1565   case TargetOpcode::G_FMA:
1566   case TargetOpcode::G_FNEG:
1567   case TargetOpcode::G_FABS:
1568   case TargetOpcode::G_FCANONICALIZE:
1569   case TargetOpcode::G_FMINNUM:
1570   case TargetOpcode::G_FMAXNUM:
1571   case TargetOpcode::G_FMINNUM_IEEE:
1572   case TargetOpcode::G_FMAXNUM_IEEE:
1573   case TargetOpcode::G_FMINIMUM:
1574   case TargetOpcode::G_FMAXIMUM:
1575   case TargetOpcode::G_FDIV:
1576   case TargetOpcode::G_FREM:
1577   case TargetOpcode::G_FCEIL:
1578   case TargetOpcode::G_FFLOOR:
1579   case TargetOpcode::G_FCOS:
1580   case TargetOpcode::G_FSIN:
1581   case TargetOpcode::G_FLOG10:
1582   case TargetOpcode::G_FLOG:
1583   case TargetOpcode::G_FLOG2:
1584   case TargetOpcode::G_FRINT:
1585   case TargetOpcode::G_FNEARBYINT:
1586   case TargetOpcode::G_FSQRT:
1587   case TargetOpcode::G_FEXP:
1588   case TargetOpcode::G_FEXP2:
1589   case TargetOpcode::G_FPOW:
1590   case TargetOpcode::G_INTRINSIC_TRUNC:
1591   case TargetOpcode::G_INTRINSIC_ROUND:
1592     assert(TypeIdx == 0);
1593     Observer.changingInstr(MI);
1594 
1595     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1596       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1597 
1598     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1599     Observer.changedInstr(MI);
1600     return Legalized;
1601   case TargetOpcode::G_INTTOPTR:
1602     if (TypeIdx != 1)
1603       return UnableToLegalize;
1604 
1605     Observer.changingInstr(MI);
1606     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1607     Observer.changedInstr(MI);
1608     return Legalized;
1609   case TargetOpcode::G_PTRTOINT:
1610     if (TypeIdx != 0)
1611       return UnableToLegalize;
1612 
1613     Observer.changingInstr(MI);
1614     widenScalarDst(MI, WideTy, 0);
1615     Observer.changedInstr(MI);
1616     return Legalized;
1617   case TargetOpcode::G_BUILD_VECTOR: {
1618     Observer.changingInstr(MI);
1619 
1620     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1621     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1622       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1623 
1624     // Avoid changing the result vector type if the source element type was
1625     // requested.
1626     if (TypeIdx == 1) {
1627       auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1628       MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1629     } else {
1630       widenScalarDst(MI, WideTy, 0);
1631     }
1632 
1633     Observer.changedInstr(MI);
1634     return Legalized;
1635   }
1636   }
1637 }
1638 
1639 LegalizerHelper::LegalizeResult
1640 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1641   using namespace TargetOpcode;
1642   MIRBuilder.setInstr(MI);
1643 
1644   switch(MI.getOpcode()) {
1645   default:
1646     return UnableToLegalize;
1647   case TargetOpcode::G_SREM:
1648   case TargetOpcode::G_UREM: {
1649     Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1650     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1651         .addDef(QuotReg)
1652         .addUse(MI.getOperand(1).getReg())
1653         .addUse(MI.getOperand(2).getReg());
1654 
1655     Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1656     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1657     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1658                         ProdReg);
1659     MI.eraseFromParent();
1660     return Legalized;
1661   }
1662   case TargetOpcode::G_SMULO:
1663   case TargetOpcode::G_UMULO: {
1664     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1665     // result.
1666     Register Res = MI.getOperand(0).getReg();
1667     Register Overflow = MI.getOperand(1).getReg();
1668     Register LHS = MI.getOperand(2).getReg();
1669     Register RHS = MI.getOperand(3).getReg();
1670 
1671     MIRBuilder.buildMul(Res, LHS, RHS);
1672 
1673     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1674                           ? TargetOpcode::G_SMULH
1675                           : TargetOpcode::G_UMULH;
1676 
1677     Register HiPart = MRI.createGenericVirtualRegister(Ty);
1678     MIRBuilder.buildInstr(Opcode)
1679       .addDef(HiPart)
1680       .addUse(LHS)
1681       .addUse(RHS);
1682 
1683     Register Zero = MRI.createGenericVirtualRegister(Ty);
1684     MIRBuilder.buildConstant(Zero, 0);
1685 
1686     // For *signed* multiply, overflow is detected by checking:
1687     // (hi != (lo >> bitwidth-1))
1688     if (Opcode == TargetOpcode::G_SMULH) {
1689       Register Shifted = MRI.createGenericVirtualRegister(Ty);
1690       Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1691       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1692       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1693         .addDef(Shifted)
1694         .addUse(Res)
1695         .addUse(ShiftAmt);
1696       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1697     } else {
1698       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1699     }
1700     MI.eraseFromParent();
1701     return Legalized;
1702   }
1703   case TargetOpcode::G_FNEG: {
1704     // TODO: Handle vector types once we are able to
1705     // represent them.
1706     if (Ty.isVector())
1707       return UnableToLegalize;
1708     Register Res = MI.getOperand(0).getReg();
1709     Type *ZeroTy;
1710     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1711     switch (Ty.getSizeInBits()) {
1712     case 16:
1713       ZeroTy = Type::getHalfTy(Ctx);
1714       break;
1715     case 32:
1716       ZeroTy = Type::getFloatTy(Ctx);
1717       break;
1718     case 64:
1719       ZeroTy = Type::getDoubleTy(Ctx);
1720       break;
1721     case 128:
1722       ZeroTy = Type::getFP128Ty(Ctx);
1723       break;
1724     default:
1725       llvm_unreachable("unexpected floating-point type");
1726     }
1727     ConstantFP &ZeroForNegation =
1728         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1729     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1730     Register SubByReg = MI.getOperand(1).getReg();
1731     Register ZeroReg = Zero->getOperand(0).getReg();
1732     MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1733                           MI.getFlags());
1734     MI.eraseFromParent();
1735     return Legalized;
1736   }
1737   case TargetOpcode::G_FSUB: {
1738     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1739     // First, check if G_FNEG is marked as Lower. If so, we may
1740     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1741     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1742       return UnableToLegalize;
1743     Register Res = MI.getOperand(0).getReg();
1744     Register LHS = MI.getOperand(1).getReg();
1745     Register RHS = MI.getOperand(2).getReg();
1746     Register Neg = MRI.createGenericVirtualRegister(Ty);
1747     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1748     MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1749     MI.eraseFromParent();
1750     return Legalized;
1751   }
1752   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1753     Register OldValRes = MI.getOperand(0).getReg();
1754     Register SuccessRes = MI.getOperand(1).getReg();
1755     Register Addr = MI.getOperand(2).getReg();
1756     Register CmpVal = MI.getOperand(3).getReg();
1757     Register NewVal = MI.getOperand(4).getReg();
1758     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1759                                   **MI.memoperands_begin());
1760     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1761     MI.eraseFromParent();
1762     return Legalized;
1763   }
1764   case TargetOpcode::G_LOAD:
1765   case TargetOpcode::G_SEXTLOAD:
1766   case TargetOpcode::G_ZEXTLOAD: {
1767     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1768     Register DstReg = MI.getOperand(0).getReg();
1769     Register PtrReg = MI.getOperand(1).getReg();
1770     LLT DstTy = MRI.getType(DstReg);
1771     auto &MMO = **MI.memoperands_begin();
1772 
1773     if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
1774       if (MI.getOpcode() == TargetOpcode::G_LOAD) {
1775         // This load needs splitting into power of 2 sized loads.
1776         if (DstTy.isVector())
1777           return UnableToLegalize;
1778         if (isPowerOf2_32(DstTy.getSizeInBits()))
1779           return UnableToLegalize; // Don't know what we're being asked to do.
1780 
1781         // Our strategy here is to generate anyextending loads for the smaller
1782         // types up to next power-2 result type, and then combine the two larger
1783         // result values together, before truncating back down to the non-pow-2
1784         // type.
1785         // E.g. v1 = i24 load =>
1786         // v2 = i32 load (2 byte)
1787         // v3 = i32 load (1 byte)
1788         // v4 = i32 shl v3, 16
1789         // v5 = i32 or v4, v2
1790         // v1 = i24 trunc v5
1791         // By doing this we generate the correct truncate which should get
1792         // combined away as an artifact with a matching extend.
1793         uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
1794         uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
1795 
1796         MachineFunction &MF = MIRBuilder.getMF();
1797         MachineMemOperand *LargeMMO =
1798             MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
1799         MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
1800             &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
1801 
1802         LLT PtrTy = MRI.getType(PtrReg);
1803         unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
1804         LLT AnyExtTy = LLT::scalar(AnyExtSize);
1805         Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
1806         Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
1807         auto LargeLoad =
1808             MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
1809 
1810         auto OffsetCst =
1811             MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
1812         Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
1813         auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
1814         auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
1815                                               *SmallMMO);
1816 
1817         auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
1818         auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
1819         auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
1820         MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
1821         MI.eraseFromParent();
1822         return Legalized;
1823       }
1824       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1825       MI.eraseFromParent();
1826       return Legalized;
1827     }
1828 
1829     if (DstTy.isScalar()) {
1830       Register TmpReg =
1831           MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
1832       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1833       switch (MI.getOpcode()) {
1834       default:
1835         llvm_unreachable("Unexpected opcode");
1836       case TargetOpcode::G_LOAD:
1837         MIRBuilder.buildAnyExt(DstReg, TmpReg);
1838         break;
1839       case TargetOpcode::G_SEXTLOAD:
1840         MIRBuilder.buildSExt(DstReg, TmpReg);
1841         break;
1842       case TargetOpcode::G_ZEXTLOAD:
1843         MIRBuilder.buildZExt(DstReg, TmpReg);
1844         break;
1845       }
1846       MI.eraseFromParent();
1847       return Legalized;
1848     }
1849 
1850     return UnableToLegalize;
1851   }
1852   case TargetOpcode::G_STORE: {
1853     // Lower a non-power of 2 store into multiple pow-2 stores.
1854     // E.g. split an i24 store into an i16 store + i8 store.
1855     // We do this by first extending the stored value to the next largest power
1856     // of 2 type, and then using truncating stores to store the components.
1857     // By doing this, likewise with G_LOAD, generate an extend that can be
1858     // artifact-combined away instead of leaving behind extracts.
1859     Register SrcReg = MI.getOperand(0).getReg();
1860     Register PtrReg = MI.getOperand(1).getReg();
1861     LLT SrcTy = MRI.getType(SrcReg);
1862     MachineMemOperand &MMO = **MI.memoperands_begin();
1863     if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
1864       return UnableToLegalize;
1865     if (SrcTy.isVector())
1866       return UnableToLegalize;
1867     if (isPowerOf2_32(SrcTy.getSizeInBits()))
1868       return UnableToLegalize; // Don't know what we're being asked to do.
1869 
1870     // Extend to the next pow-2.
1871     const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
1872     auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
1873 
1874     // Obtain the smaller value by shifting away the larger value.
1875     uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
1876     uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
1877     auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
1878     auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
1879 
1880     // Generate the GEP and truncating stores.
1881     LLT PtrTy = MRI.getType(PtrReg);
1882     auto OffsetCst =
1883         MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
1884     Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
1885     auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
1886 
1887     MachineFunction &MF = MIRBuilder.getMF();
1888     MachineMemOperand *LargeMMO =
1889         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
1890     MachineMemOperand *SmallMMO =
1891         MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
1892     MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
1893     MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
1894     MI.eraseFromParent();
1895     return Legalized;
1896   }
1897   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1898   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1899   case TargetOpcode::G_CTLZ:
1900   case TargetOpcode::G_CTTZ:
1901   case TargetOpcode::G_CTPOP:
1902     return lowerBitCount(MI, TypeIdx, Ty);
1903   case G_UADDO: {
1904     Register Res = MI.getOperand(0).getReg();
1905     Register CarryOut = MI.getOperand(1).getReg();
1906     Register LHS = MI.getOperand(2).getReg();
1907     Register RHS = MI.getOperand(3).getReg();
1908 
1909     MIRBuilder.buildAdd(Res, LHS, RHS);
1910     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
1911 
1912     MI.eraseFromParent();
1913     return Legalized;
1914   }
1915   case G_UADDE: {
1916     Register Res = MI.getOperand(0).getReg();
1917     Register CarryOut = MI.getOperand(1).getReg();
1918     Register LHS = MI.getOperand(2).getReg();
1919     Register RHS = MI.getOperand(3).getReg();
1920     Register CarryIn = MI.getOperand(4).getReg();
1921 
1922     Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1923     Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1924 
1925     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1926     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1927     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1928     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1929 
1930     MI.eraseFromParent();
1931     return Legalized;
1932   }
1933   case G_USUBO: {
1934     Register Res = MI.getOperand(0).getReg();
1935     Register BorrowOut = MI.getOperand(1).getReg();
1936     Register LHS = MI.getOperand(2).getReg();
1937     Register RHS = MI.getOperand(3).getReg();
1938 
1939     MIRBuilder.buildSub(Res, LHS, RHS);
1940     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1941 
1942     MI.eraseFromParent();
1943     return Legalized;
1944   }
1945   case G_USUBE: {
1946     Register Res = MI.getOperand(0).getReg();
1947     Register BorrowOut = MI.getOperand(1).getReg();
1948     Register LHS = MI.getOperand(2).getReg();
1949     Register RHS = MI.getOperand(3).getReg();
1950     Register BorrowIn = MI.getOperand(4).getReg();
1951 
1952     Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1953     Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1954     Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1955     Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1956 
1957     MIRBuilder.buildSub(TmpRes, LHS, RHS);
1958     MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1959     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1960     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1961     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1962     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1963 
1964     MI.eraseFromParent();
1965     return Legalized;
1966   }
1967   case G_UITOFP:
1968     return lowerUITOFP(MI, TypeIdx, Ty);
1969   case G_SITOFP:
1970     return lowerSITOFP(MI, TypeIdx, Ty);
1971   case G_SMIN:
1972   case G_SMAX:
1973   case G_UMIN:
1974   case G_UMAX:
1975     return lowerMinMax(MI, TypeIdx, Ty);
1976   case G_FCOPYSIGN:
1977     return lowerFCopySign(MI, TypeIdx, Ty);
1978   case G_FMINNUM:
1979   case G_FMAXNUM:
1980     return lowerFMinNumMaxNum(MI);
1981   case G_UNMERGE_VALUES:
1982     return lowerUnmergeValues(MI);
1983   }
1984 }
1985 
1986 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1987     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1988   SmallVector<Register, 2> DstRegs;
1989 
1990   unsigned NarrowSize = NarrowTy.getSizeInBits();
1991   Register DstReg = MI.getOperand(0).getReg();
1992   unsigned Size = MRI.getType(DstReg).getSizeInBits();
1993   int NumParts = Size / NarrowSize;
1994   // FIXME: Don't know how to handle the situation where the small vectors
1995   // aren't all the same size yet.
1996   if (Size % NarrowSize != 0)
1997     return UnableToLegalize;
1998 
1999   for (int i = 0; i < NumParts; ++i) {
2000     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2001     MIRBuilder.buildUndef(TmpReg);
2002     DstRegs.push_back(TmpReg);
2003   }
2004 
2005   if (NarrowTy.isVector())
2006     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2007   else
2008     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2009 
2010   MI.eraseFromParent();
2011   return Legalized;
2012 }
2013 
2014 LegalizerHelper::LegalizeResult
2015 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
2016                                           LLT NarrowTy) {
2017   const unsigned Opc = MI.getOpcode();
2018   const unsigned NumOps = MI.getNumOperands() - 1;
2019   const unsigned NarrowSize = NarrowTy.getSizeInBits();
2020   const Register DstReg = MI.getOperand(0).getReg();
2021   const unsigned Flags = MI.getFlags();
2022   const LLT DstTy = MRI.getType(DstReg);
2023   const unsigned Size = DstTy.getSizeInBits();
2024   const int NumParts = Size / NarrowSize;
2025   const LLT EltTy = DstTy.getElementType();
2026   const unsigned EltSize = EltTy.getSizeInBits();
2027   const unsigned BitsForNumParts = NarrowSize * NumParts;
2028 
2029   // Check if we have any leftovers. If we do, then only handle the case where
2030   // the leftover is one element.
2031   if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2032     return UnableToLegalize;
2033 
2034   if (BitsForNumParts != Size) {
2035     Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2036     MIRBuilder.buildUndef(AccumDstReg);
2037 
2038     // Handle the pieces which evenly divide into the requested type with
2039     // extract/op/insert sequence.
2040     for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2041       SmallVector<SrcOp, 4> SrcOps;
2042       for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2043         Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2044         MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
2045         SrcOps.push_back(PartOpReg);
2046       }
2047 
2048       Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2049       MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2050 
2051       Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2052       MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2053       AccumDstReg = PartInsertReg;
2054     }
2055 
2056     // Handle the remaining element sized leftover piece.
2057     SmallVector<SrcOp, 4> SrcOps;
2058     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2059       Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2060       MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
2061                               BitsForNumParts);
2062       SrcOps.push_back(PartOpReg);
2063     }
2064 
2065     Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2066     MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2067     MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2068     MI.eraseFromParent();
2069 
2070     return Legalized;
2071   }
2072 
2073   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2074 
2075   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2076 
2077   if (NumOps >= 2)
2078     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2079 
2080   if (NumOps >= 3)
2081     extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2082 
2083   for (int i = 0; i < NumParts; ++i) {
2084     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2085 
2086     if (NumOps == 1)
2087       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2088     else if (NumOps == 2) {
2089       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2090     } else if (NumOps == 3) {
2091       MIRBuilder.buildInstr(Opc, {DstReg},
2092                             {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2093     }
2094 
2095     DstRegs.push_back(DstReg);
2096   }
2097 
2098   if (NarrowTy.isVector())
2099     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2100   else
2101     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2102 
2103   MI.eraseFromParent();
2104   return Legalized;
2105 }
2106 
2107 // Handle splitting vector operations which need to have the same number of
2108 // elements in each type index, but each type index may have a different element
2109 // type.
2110 //
2111 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2112 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2113 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2114 //
2115 // Also handles some irregular breakdown cases, e.g.
2116 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2117 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2118 //             s64 = G_SHL s64, s32
2119 LegalizerHelper::LegalizeResult
2120 LegalizerHelper::fewerElementsVectorMultiEltType(
2121   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2122   if (TypeIdx != 0)
2123     return UnableToLegalize;
2124 
2125   const LLT NarrowTy0 = NarrowTyArg;
2126   const unsigned NewNumElts =
2127       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2128 
2129   const Register DstReg = MI.getOperand(0).getReg();
2130   LLT DstTy = MRI.getType(DstReg);
2131   LLT LeftoverTy0;
2132 
2133   // All of the operands need to have the same number of elements, so if we can
2134   // determine a type breakdown for the result type, we can for all of the
2135   // source types.
2136   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2137   if (NumParts < 0)
2138     return UnableToLegalize;
2139 
2140   SmallVector<MachineInstrBuilder, 4> NewInsts;
2141 
2142   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2143   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2144 
2145   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2146     LLT LeftoverTy;
2147     Register SrcReg = MI.getOperand(I).getReg();
2148     LLT SrcTyI = MRI.getType(SrcReg);
2149     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2150     LLT LeftoverTyI;
2151 
2152     // Split this operand into the requested typed registers, and any leftover
2153     // required to reproduce the original type.
2154     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2155                       LeftoverRegs))
2156       return UnableToLegalize;
2157 
2158     if (I == 1) {
2159       // For the first operand, create an instruction for each part and setup
2160       // the result.
2161       for (Register PartReg : PartRegs) {
2162         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2163         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2164                                .addDef(PartDstReg)
2165                                .addUse(PartReg));
2166         DstRegs.push_back(PartDstReg);
2167       }
2168 
2169       for (Register LeftoverReg : LeftoverRegs) {
2170         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2171         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2172                                .addDef(PartDstReg)
2173                                .addUse(LeftoverReg));
2174         LeftoverDstRegs.push_back(PartDstReg);
2175       }
2176     } else {
2177       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2178 
2179       // Add the newly created operand splits to the existing instructions. The
2180       // odd-sized pieces are ordered after the requested NarrowTyArg sized
2181       // pieces.
2182       unsigned InstCount = 0;
2183       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2184         NewInsts[InstCount++].addUse(PartRegs[J]);
2185       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2186         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2187     }
2188 
2189     PartRegs.clear();
2190     LeftoverRegs.clear();
2191   }
2192 
2193   // Insert the newly built operations and rebuild the result register.
2194   for (auto &MIB : NewInsts)
2195     MIRBuilder.insertInstr(MIB);
2196 
2197   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2198 
2199   MI.eraseFromParent();
2200   return Legalized;
2201 }
2202 
2203 LegalizerHelper::LegalizeResult
2204 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2205                                           LLT NarrowTy) {
2206   if (TypeIdx != 0)
2207     return UnableToLegalize;
2208 
2209   Register DstReg = MI.getOperand(0).getReg();
2210   Register SrcReg = MI.getOperand(1).getReg();
2211   LLT DstTy = MRI.getType(DstReg);
2212   LLT SrcTy = MRI.getType(SrcReg);
2213 
2214   LLT NarrowTy0 = NarrowTy;
2215   LLT NarrowTy1;
2216   unsigned NumParts;
2217 
2218   if (NarrowTy.isVector()) {
2219     // Uneven breakdown not handled.
2220     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2221     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2222       return UnableToLegalize;
2223 
2224     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2225   } else {
2226     NumParts = DstTy.getNumElements();
2227     NarrowTy1 = SrcTy.getElementType();
2228   }
2229 
2230   SmallVector<Register, 4> SrcRegs, DstRegs;
2231   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2232 
2233   for (unsigned I = 0; I < NumParts; ++I) {
2234     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2235     MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2236       .addDef(DstReg)
2237       .addUse(SrcRegs[I]);
2238 
2239     NewInst->setFlags(MI.getFlags());
2240     DstRegs.push_back(DstReg);
2241   }
2242 
2243   if (NarrowTy.isVector())
2244     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2245   else
2246     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2247 
2248   MI.eraseFromParent();
2249   return Legalized;
2250 }
2251 
2252 LegalizerHelper::LegalizeResult
2253 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2254                                         LLT NarrowTy) {
2255   Register DstReg = MI.getOperand(0).getReg();
2256   Register Src0Reg = MI.getOperand(2).getReg();
2257   LLT DstTy = MRI.getType(DstReg);
2258   LLT SrcTy = MRI.getType(Src0Reg);
2259 
2260   unsigned NumParts;
2261   LLT NarrowTy0, NarrowTy1;
2262 
2263   if (TypeIdx == 0) {
2264     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2265     unsigned OldElts = DstTy.getNumElements();
2266 
2267     NarrowTy0 = NarrowTy;
2268     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2269     NarrowTy1 = NarrowTy.isVector() ?
2270       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2271       SrcTy.getElementType();
2272 
2273   } else {
2274     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2275     unsigned OldElts = SrcTy.getNumElements();
2276 
2277     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2278       NarrowTy.getNumElements();
2279     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2280                             DstTy.getScalarSizeInBits());
2281     NarrowTy1 = NarrowTy;
2282   }
2283 
2284   // FIXME: Don't know how to handle the situation where the small vectors
2285   // aren't all the same size yet.
2286   if (NarrowTy1.isVector() &&
2287       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2288     return UnableToLegalize;
2289 
2290   CmpInst::Predicate Pred
2291     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2292 
2293   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2294   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2295   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2296 
2297   for (unsigned I = 0; I < NumParts; ++I) {
2298     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2299     DstRegs.push_back(DstReg);
2300 
2301     if (MI.getOpcode() == TargetOpcode::G_ICMP)
2302       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2303     else {
2304       MachineInstr *NewCmp
2305         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2306       NewCmp->setFlags(MI.getFlags());
2307     }
2308   }
2309 
2310   if (NarrowTy1.isVector())
2311     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2312   else
2313     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2314 
2315   MI.eraseFromParent();
2316   return Legalized;
2317 }
2318 
2319 LegalizerHelper::LegalizeResult
2320 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2321                                            LLT NarrowTy) {
2322   Register DstReg = MI.getOperand(0).getReg();
2323   Register CondReg = MI.getOperand(1).getReg();
2324 
2325   unsigned NumParts = 0;
2326   LLT NarrowTy0, NarrowTy1;
2327 
2328   LLT DstTy = MRI.getType(DstReg);
2329   LLT CondTy = MRI.getType(CondReg);
2330   unsigned Size = DstTy.getSizeInBits();
2331 
2332   assert(TypeIdx == 0 || CondTy.isVector());
2333 
2334   if (TypeIdx == 0) {
2335     NarrowTy0 = NarrowTy;
2336     NarrowTy1 = CondTy;
2337 
2338     unsigned NarrowSize = NarrowTy0.getSizeInBits();
2339     // FIXME: Don't know how to handle the situation where the small vectors
2340     // aren't all the same size yet.
2341     if (Size % NarrowSize != 0)
2342       return UnableToLegalize;
2343 
2344     NumParts = Size / NarrowSize;
2345 
2346     // Need to break down the condition type
2347     if (CondTy.isVector()) {
2348       if (CondTy.getNumElements() == NumParts)
2349         NarrowTy1 = CondTy.getElementType();
2350       else
2351         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2352                                 CondTy.getScalarSizeInBits());
2353     }
2354   } else {
2355     NumParts = CondTy.getNumElements();
2356     if (NarrowTy.isVector()) {
2357       // TODO: Handle uneven breakdown.
2358       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2359         return UnableToLegalize;
2360 
2361       return UnableToLegalize;
2362     } else {
2363       NarrowTy0 = DstTy.getElementType();
2364       NarrowTy1 = NarrowTy;
2365     }
2366   }
2367 
2368   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2369   if (CondTy.isVector())
2370     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2371 
2372   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2373   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2374 
2375   for (unsigned i = 0; i < NumParts; ++i) {
2376     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2377     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2378                            Src1Regs[i], Src2Regs[i]);
2379     DstRegs.push_back(DstReg);
2380   }
2381 
2382   if (NarrowTy0.isVector())
2383     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2384   else
2385     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2386 
2387   MI.eraseFromParent();
2388   return Legalized;
2389 }
2390 
2391 LegalizerHelper::LegalizeResult
2392 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2393                                         LLT NarrowTy) {
2394   const Register DstReg = MI.getOperand(0).getReg();
2395   LLT PhiTy = MRI.getType(DstReg);
2396   LLT LeftoverTy;
2397 
2398   // All of the operands need to have the same number of elements, so if we can
2399   // determine a type breakdown for the result type, we can for all of the
2400   // source types.
2401   int NumParts, NumLeftover;
2402   std::tie(NumParts, NumLeftover)
2403     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2404   if (NumParts < 0)
2405     return UnableToLegalize;
2406 
2407   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2408   SmallVector<MachineInstrBuilder, 4> NewInsts;
2409 
2410   const int TotalNumParts = NumParts + NumLeftover;
2411 
2412   // Insert the new phis in the result block first.
2413   for (int I = 0; I != TotalNumParts; ++I) {
2414     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2415     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2416     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2417                        .addDef(PartDstReg));
2418     if (I < NumParts)
2419       DstRegs.push_back(PartDstReg);
2420     else
2421       LeftoverDstRegs.push_back(PartDstReg);
2422   }
2423 
2424   MachineBasicBlock *MBB = MI.getParent();
2425   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2426   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2427 
2428   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2429 
2430   // Insert code to extract the incoming values in each predecessor block.
2431   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2432     PartRegs.clear();
2433     LeftoverRegs.clear();
2434 
2435     Register SrcReg = MI.getOperand(I).getReg();
2436     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2437     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2438 
2439     LLT Unused;
2440     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2441                       LeftoverRegs))
2442       return UnableToLegalize;
2443 
2444     // Add the newly created operand splits to the existing instructions. The
2445     // odd-sized pieces are ordered after the requested NarrowTyArg sized
2446     // pieces.
2447     for (int J = 0; J != TotalNumParts; ++J) {
2448       MachineInstrBuilder MIB = NewInsts[J];
2449       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2450       MIB.addMBB(&OpMBB);
2451     }
2452   }
2453 
2454   MI.eraseFromParent();
2455   return Legalized;
2456 }
2457 
2458 LegalizerHelper::LegalizeResult
2459 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2460                                       LLT NarrowTy) {
2461   // FIXME: Don't know how to handle secondary types yet.
2462   if (TypeIdx != 0)
2463     return UnableToLegalize;
2464 
2465   MachineMemOperand *MMO = *MI.memoperands_begin();
2466 
2467   // This implementation doesn't work for atomics. Give up instead of doing
2468   // something invalid.
2469   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2470       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
2471     return UnableToLegalize;
2472 
2473   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2474   Register ValReg = MI.getOperand(0).getReg();
2475   Register AddrReg = MI.getOperand(1).getReg();
2476   LLT ValTy = MRI.getType(ValReg);
2477 
2478   int NumParts = -1;
2479   int NumLeftover = -1;
2480   LLT LeftoverTy;
2481   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2482   if (IsLoad) {
2483     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2484   } else {
2485     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2486                      NarrowLeftoverRegs)) {
2487       NumParts = NarrowRegs.size();
2488       NumLeftover = NarrowLeftoverRegs.size();
2489     }
2490   }
2491 
2492   if (NumParts == -1)
2493     return UnableToLegalize;
2494 
2495   const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2496 
2497   unsigned TotalSize = ValTy.getSizeInBits();
2498 
2499   // Split the load/store into PartTy sized pieces starting at Offset. If this
2500   // is a load, return the new registers in ValRegs. For a store, each elements
2501   // of ValRegs should be PartTy. Returns the next offset that needs to be
2502   // handled.
2503   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2504                              unsigned Offset) -> unsigned {
2505     MachineFunction &MF = MIRBuilder.getMF();
2506     unsigned PartSize = PartTy.getSizeInBits();
2507     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2508          Offset += PartSize, ++Idx) {
2509       unsigned ByteSize = PartSize / 8;
2510       unsigned ByteOffset = Offset / 8;
2511       Register NewAddrReg;
2512 
2513       MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2514 
2515       MachineMemOperand *NewMMO =
2516         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2517 
2518       if (IsLoad) {
2519         Register Dst = MRI.createGenericVirtualRegister(PartTy);
2520         ValRegs.push_back(Dst);
2521         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2522       } else {
2523         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2524       }
2525     }
2526 
2527     return Offset;
2528   };
2529 
2530   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2531 
2532   // Handle the rest of the register if this isn't an even type breakdown.
2533   if (LeftoverTy.isValid())
2534     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2535 
2536   if (IsLoad) {
2537     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2538                 LeftoverTy, NarrowLeftoverRegs);
2539   }
2540 
2541   MI.eraseFromParent();
2542   return Legalized;
2543 }
2544 
2545 LegalizerHelper::LegalizeResult
2546 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2547                                      LLT NarrowTy) {
2548   using namespace TargetOpcode;
2549 
2550   MIRBuilder.setInstr(MI);
2551   switch (MI.getOpcode()) {
2552   case G_IMPLICIT_DEF:
2553     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2554   case G_AND:
2555   case G_OR:
2556   case G_XOR:
2557   case G_ADD:
2558   case G_SUB:
2559   case G_MUL:
2560   case G_SMULH:
2561   case G_UMULH:
2562   case G_FADD:
2563   case G_FMUL:
2564   case G_FSUB:
2565   case G_FNEG:
2566   case G_FABS:
2567   case G_FCANONICALIZE:
2568   case G_FDIV:
2569   case G_FREM:
2570   case G_FMA:
2571   case G_FPOW:
2572   case G_FEXP:
2573   case G_FEXP2:
2574   case G_FLOG:
2575   case G_FLOG2:
2576   case G_FLOG10:
2577   case G_FNEARBYINT:
2578   case G_FCEIL:
2579   case G_FFLOOR:
2580   case G_FRINT:
2581   case G_INTRINSIC_ROUND:
2582   case G_INTRINSIC_TRUNC:
2583   case G_FCOS:
2584   case G_FSIN:
2585   case G_FSQRT:
2586   case G_BSWAP:
2587   case G_SDIV:
2588   case G_SMIN:
2589   case G_SMAX:
2590   case G_UMIN:
2591   case G_UMAX:
2592   case G_FMINNUM:
2593   case G_FMAXNUM:
2594   case G_FMINNUM_IEEE:
2595   case G_FMAXNUM_IEEE:
2596   case G_FMINIMUM:
2597   case G_FMAXIMUM:
2598     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2599   case G_SHL:
2600   case G_LSHR:
2601   case G_ASHR:
2602   case G_CTLZ:
2603   case G_CTLZ_ZERO_UNDEF:
2604   case G_CTTZ:
2605   case G_CTTZ_ZERO_UNDEF:
2606   case G_CTPOP:
2607   case G_FCOPYSIGN:
2608     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2609   case G_ZEXT:
2610   case G_SEXT:
2611   case G_ANYEXT:
2612   case G_FPEXT:
2613   case G_FPTRUNC:
2614   case G_SITOFP:
2615   case G_UITOFP:
2616   case G_FPTOSI:
2617   case G_FPTOUI:
2618   case G_INTTOPTR:
2619   case G_PTRTOINT:
2620   case G_ADDRSPACE_CAST:
2621     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2622   case G_ICMP:
2623   case G_FCMP:
2624     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2625   case G_SELECT:
2626     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2627   case G_PHI:
2628     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2629   case G_LOAD:
2630   case G_STORE:
2631     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2632   default:
2633     return UnableToLegalize;
2634   }
2635 }
2636 
2637 LegalizerHelper::LegalizeResult
2638 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2639                                              const LLT HalfTy, const LLT AmtTy) {
2640 
2641   Register InL = MRI.createGenericVirtualRegister(HalfTy);
2642   Register InH = MRI.createGenericVirtualRegister(HalfTy);
2643   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2644 
2645   if (Amt.isNullValue()) {
2646     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2647     MI.eraseFromParent();
2648     return Legalized;
2649   }
2650 
2651   LLT NVT = HalfTy;
2652   unsigned NVTBits = HalfTy.getSizeInBits();
2653   unsigned VTBits = 2 * NVTBits;
2654 
2655   SrcOp Lo(Register(0)), Hi(Register(0));
2656   if (MI.getOpcode() == TargetOpcode::G_SHL) {
2657     if (Amt.ugt(VTBits)) {
2658       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2659     } else if (Amt.ugt(NVTBits)) {
2660       Lo = MIRBuilder.buildConstant(NVT, 0);
2661       Hi = MIRBuilder.buildShl(NVT, InL,
2662                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2663     } else if (Amt == NVTBits) {
2664       Lo = MIRBuilder.buildConstant(NVT, 0);
2665       Hi = InL;
2666     } else {
2667       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2668       auto OrLHS =
2669           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2670       auto OrRHS = MIRBuilder.buildLShr(
2671           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2672       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2673     }
2674   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2675     if (Amt.ugt(VTBits)) {
2676       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2677     } else if (Amt.ugt(NVTBits)) {
2678       Lo = MIRBuilder.buildLShr(NVT, InH,
2679                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2680       Hi = MIRBuilder.buildConstant(NVT, 0);
2681     } else if (Amt == NVTBits) {
2682       Lo = InH;
2683       Hi = MIRBuilder.buildConstant(NVT, 0);
2684     } else {
2685       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2686 
2687       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2688       auto OrRHS = MIRBuilder.buildShl(
2689           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2690 
2691       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2692       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2693     }
2694   } else {
2695     if (Amt.ugt(VTBits)) {
2696       Hi = Lo = MIRBuilder.buildAShr(
2697           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2698     } else if (Amt.ugt(NVTBits)) {
2699       Lo = MIRBuilder.buildAShr(NVT, InH,
2700                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2701       Hi = MIRBuilder.buildAShr(NVT, InH,
2702                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2703     } else if (Amt == NVTBits) {
2704       Lo = InH;
2705       Hi = MIRBuilder.buildAShr(NVT, InH,
2706                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2707     } else {
2708       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2709 
2710       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2711       auto OrRHS = MIRBuilder.buildShl(
2712           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2713 
2714       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2715       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2716     }
2717   }
2718 
2719   MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2720   MI.eraseFromParent();
2721 
2722   return Legalized;
2723 }
2724 
2725 // TODO: Optimize if constant shift amount.
2726 LegalizerHelper::LegalizeResult
2727 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
2728                                    LLT RequestedTy) {
2729   if (TypeIdx == 1) {
2730     Observer.changingInstr(MI);
2731     narrowScalarSrc(MI, RequestedTy, 2);
2732     Observer.changedInstr(MI);
2733     return Legalized;
2734   }
2735 
2736   Register DstReg = MI.getOperand(0).getReg();
2737   LLT DstTy = MRI.getType(DstReg);
2738   if (DstTy.isVector())
2739     return UnableToLegalize;
2740 
2741   Register Amt = MI.getOperand(2).getReg();
2742   LLT ShiftAmtTy = MRI.getType(Amt);
2743   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2744   if (DstEltSize % 2 != 0)
2745     return UnableToLegalize;
2746 
2747   // Ignore the input type. We can only go to exactly half the size of the
2748   // input. If that isn't small enough, the resulting pieces will be further
2749   // legalized.
2750   const unsigned NewBitSize = DstEltSize / 2;
2751   const LLT HalfTy = LLT::scalar(NewBitSize);
2752   const LLT CondTy = LLT::scalar(1);
2753 
2754   if (const MachineInstr *KShiftAmt =
2755           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2756     return narrowScalarShiftByConstant(
2757         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2758   }
2759 
2760   // TODO: Expand with known bits.
2761 
2762   // Handle the fully general expansion by an unknown amount.
2763   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2764 
2765   Register InL = MRI.createGenericVirtualRegister(HalfTy);
2766   Register InH = MRI.createGenericVirtualRegister(HalfTy);
2767   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2768 
2769   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2770   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2771 
2772   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2773   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2774   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2775 
2776   Register ResultRegs[2];
2777   switch (MI.getOpcode()) {
2778   case TargetOpcode::G_SHL: {
2779     // Short: ShAmt < NewBitSize
2780     auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2781 
2782     auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2783     auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2784     auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2785 
2786     // Long: ShAmt >= NewBitSize
2787     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
2788     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2789 
2790     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2791     auto Hi = MIRBuilder.buildSelect(
2792         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2793 
2794     ResultRegs[0] = Lo.getReg(0);
2795     ResultRegs[1] = Hi.getReg(0);
2796     break;
2797   }
2798   case TargetOpcode::G_LSHR: {
2799     // Short: ShAmt < NewBitSize
2800     auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2801 
2802     auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2803     auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2804     auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2805 
2806     // Long: ShAmt >= NewBitSize
2807     auto HiL = MIRBuilder.buildConstant(HalfTy, 0);          // Hi part is zero.
2808     auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2809 
2810     auto Lo = MIRBuilder.buildSelect(
2811         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2812     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2813 
2814     ResultRegs[0] = Lo.getReg(0);
2815     ResultRegs[1] = Hi.getReg(0);
2816     break;
2817   }
2818   case TargetOpcode::G_ASHR: {
2819     // Short: ShAmt < NewBitSize
2820     auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2821 
2822     auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2823     auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2824     auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2825 
2826     // Long: ShAmt >= NewBitSize
2827 
2828     // Sign of Hi part.
2829     auto HiL = MIRBuilder.buildAShr(
2830         HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2831 
2832     auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2833 
2834     auto Lo = MIRBuilder.buildSelect(
2835         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2836 
2837     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2838 
2839     ResultRegs[0] = Lo.getReg(0);
2840     ResultRegs[1] = Hi.getReg(0);
2841     break;
2842   }
2843   default:
2844     llvm_unreachable("not a shift");
2845   }
2846 
2847   MIRBuilder.buildMerge(DstReg, ResultRegs);
2848   MI.eraseFromParent();
2849   return Legalized;
2850 }
2851 
2852 LegalizerHelper::LegalizeResult
2853 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2854                                        LLT MoreTy) {
2855   assert(TypeIdx == 0 && "Expecting only Idx 0");
2856 
2857   Observer.changingInstr(MI);
2858   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2859     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2860     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2861     moreElementsVectorSrc(MI, MoreTy, I);
2862   }
2863 
2864   MachineBasicBlock &MBB = *MI.getParent();
2865   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2866   moreElementsVectorDst(MI, MoreTy, 0);
2867   Observer.changedInstr(MI);
2868   return Legalized;
2869 }
2870 
2871 LegalizerHelper::LegalizeResult
2872 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
2873                                     LLT MoreTy) {
2874   MIRBuilder.setInstr(MI);
2875   unsigned Opc = MI.getOpcode();
2876   switch (Opc) {
2877   case TargetOpcode::G_IMPLICIT_DEF:
2878   case TargetOpcode::G_LOAD: {
2879     if (TypeIdx != 0)
2880       return UnableToLegalize;
2881     Observer.changingInstr(MI);
2882     moreElementsVectorDst(MI, MoreTy, 0);
2883     Observer.changedInstr(MI);
2884     return Legalized;
2885   }
2886   case TargetOpcode::G_STORE:
2887     if (TypeIdx != 0)
2888       return UnableToLegalize;
2889     Observer.changingInstr(MI);
2890     moreElementsVectorSrc(MI, MoreTy, 0);
2891     Observer.changedInstr(MI);
2892     return Legalized;
2893   case TargetOpcode::G_AND:
2894   case TargetOpcode::G_OR:
2895   case TargetOpcode::G_XOR:
2896   case TargetOpcode::G_SMIN:
2897   case TargetOpcode::G_SMAX:
2898   case TargetOpcode::G_UMIN:
2899   case TargetOpcode::G_UMAX: {
2900     Observer.changingInstr(MI);
2901     moreElementsVectorSrc(MI, MoreTy, 1);
2902     moreElementsVectorSrc(MI, MoreTy, 2);
2903     moreElementsVectorDst(MI, MoreTy, 0);
2904     Observer.changedInstr(MI);
2905     return Legalized;
2906   }
2907   case TargetOpcode::G_EXTRACT:
2908     if (TypeIdx != 1)
2909       return UnableToLegalize;
2910     Observer.changingInstr(MI);
2911     moreElementsVectorSrc(MI, MoreTy, 1);
2912     Observer.changedInstr(MI);
2913     return Legalized;
2914   case TargetOpcode::G_INSERT:
2915     if (TypeIdx != 0)
2916       return UnableToLegalize;
2917     Observer.changingInstr(MI);
2918     moreElementsVectorSrc(MI, MoreTy, 1);
2919     moreElementsVectorDst(MI, MoreTy, 0);
2920     Observer.changedInstr(MI);
2921     return Legalized;
2922   case TargetOpcode::G_SELECT:
2923     if (TypeIdx != 0)
2924       return UnableToLegalize;
2925     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2926       return UnableToLegalize;
2927 
2928     Observer.changingInstr(MI);
2929     moreElementsVectorSrc(MI, MoreTy, 2);
2930     moreElementsVectorSrc(MI, MoreTy, 3);
2931     moreElementsVectorDst(MI, MoreTy, 0);
2932     Observer.changedInstr(MI);
2933     return Legalized;
2934   case TargetOpcode::G_PHI:
2935     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
2936   default:
2937     return UnableToLegalize;
2938   }
2939 }
2940 
2941 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
2942                                         ArrayRef<Register> Src1Regs,
2943                                         ArrayRef<Register> Src2Regs,
2944                                         LLT NarrowTy) {
2945   MachineIRBuilder &B = MIRBuilder;
2946   unsigned SrcParts = Src1Regs.size();
2947   unsigned DstParts = DstRegs.size();
2948 
2949   unsigned DstIdx = 0; // Low bits of the result.
2950   Register FactorSum =
2951       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
2952   DstRegs[DstIdx] = FactorSum;
2953 
2954   unsigned CarrySumPrevDstIdx;
2955   SmallVector<Register, 4> Factors;
2956 
2957   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
2958     // Collect low parts of muls for DstIdx.
2959     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
2960          i <= std::min(DstIdx, SrcParts - 1); ++i) {
2961       MachineInstrBuilder Mul =
2962           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
2963       Factors.push_back(Mul.getReg(0));
2964     }
2965     // Collect high parts of muls from previous DstIdx.
2966     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
2967          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
2968       MachineInstrBuilder Umulh =
2969           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
2970       Factors.push_back(Umulh.getReg(0));
2971     }
2972     // Add CarrySum from additons calculated for previous DstIdx.
2973     if (DstIdx != 1) {
2974       Factors.push_back(CarrySumPrevDstIdx);
2975     }
2976 
2977     Register CarrySum;
2978     // Add all factors and accumulate all carries into CarrySum.
2979     if (DstIdx != DstParts - 1) {
2980       MachineInstrBuilder Uaddo =
2981           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
2982       FactorSum = Uaddo.getReg(0);
2983       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
2984       for (unsigned i = 2; i < Factors.size(); ++i) {
2985         MachineInstrBuilder Uaddo =
2986             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
2987         FactorSum = Uaddo.getReg(0);
2988         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
2989         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
2990       }
2991     } else {
2992       // Since value for the next index is not calculated, neither is CarrySum.
2993       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
2994       for (unsigned i = 2; i < Factors.size(); ++i)
2995         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
2996     }
2997 
2998     CarrySumPrevDstIdx = CarrySum;
2999     DstRegs[DstIdx] = FactorSum;
3000     Factors.clear();
3001   }
3002 }
3003 
3004 LegalizerHelper::LegalizeResult
3005 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3006   Register DstReg = MI.getOperand(0).getReg();
3007   Register Src1 = MI.getOperand(1).getReg();
3008   Register Src2 = MI.getOperand(2).getReg();
3009 
3010   LLT Ty = MRI.getType(DstReg);
3011   if (Ty.isVector())
3012     return UnableToLegalize;
3013 
3014   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3015   unsigned DstSize = Ty.getSizeInBits();
3016   unsigned NarrowSize = NarrowTy.getSizeInBits();
3017   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3018     return UnableToLegalize;
3019 
3020   unsigned NumDstParts = DstSize / NarrowSize;
3021   unsigned NumSrcParts = SrcSize / NarrowSize;
3022   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3023   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3024 
3025   SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3026   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3027   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3028   DstTmpRegs.resize(DstTmpParts);
3029   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3030 
3031   // Take only high half of registers if this is high mul.
3032   ArrayRef<Register> DstRegs(
3033       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3034   MIRBuilder.buildMerge(DstReg, DstRegs);
3035   MI.eraseFromParent();
3036   return Legalized;
3037 }
3038 
3039 LegalizerHelper::LegalizeResult
3040 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3041                                      LLT NarrowTy) {
3042   if (TypeIdx != 1)
3043     return UnableToLegalize;
3044 
3045   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3046 
3047   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3048   // FIXME: add support for when SizeOp1 isn't an exact multiple of
3049   // NarrowSize.
3050   if (SizeOp1 % NarrowSize != 0)
3051     return UnableToLegalize;
3052   int NumParts = SizeOp1 / NarrowSize;
3053 
3054   SmallVector<Register, 2> SrcRegs, DstRegs;
3055   SmallVector<uint64_t, 2> Indexes;
3056   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3057 
3058   Register OpReg = MI.getOperand(0).getReg();
3059   uint64_t OpStart = MI.getOperand(2).getImm();
3060   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3061   for (int i = 0; i < NumParts; ++i) {
3062     unsigned SrcStart = i * NarrowSize;
3063 
3064     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3065       // No part of the extract uses this subregister, ignore it.
3066       continue;
3067     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3068       // The entire subregister is extracted, forward the value.
3069       DstRegs.push_back(SrcRegs[i]);
3070       continue;
3071     }
3072 
3073     // OpSegStart is where this destination segment would start in OpReg if it
3074     // extended infinitely in both directions.
3075     int64_t ExtractOffset;
3076     uint64_t SegSize;
3077     if (OpStart < SrcStart) {
3078       ExtractOffset = 0;
3079       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3080     } else {
3081       ExtractOffset = OpStart - SrcStart;
3082       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3083     }
3084 
3085     Register SegReg = SrcRegs[i];
3086     if (ExtractOffset != 0 || SegSize != NarrowSize) {
3087       // A genuine extract is needed.
3088       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3089       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3090     }
3091 
3092     DstRegs.push_back(SegReg);
3093   }
3094 
3095   Register DstReg = MI.getOperand(0).getReg();
3096   if(MRI.getType(DstReg).isVector())
3097     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3098   else
3099     MIRBuilder.buildMerge(DstReg, DstRegs);
3100   MI.eraseFromParent();
3101   return Legalized;
3102 }
3103 
3104 LegalizerHelper::LegalizeResult
3105 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3106                                     LLT NarrowTy) {
3107   // FIXME: Don't know how to handle secondary types yet.
3108   if (TypeIdx != 0)
3109     return UnableToLegalize;
3110 
3111   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3112   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3113 
3114   // FIXME: add support for when SizeOp0 isn't an exact multiple of
3115   // NarrowSize.
3116   if (SizeOp0 % NarrowSize != 0)
3117     return UnableToLegalize;
3118 
3119   int NumParts = SizeOp0 / NarrowSize;
3120 
3121   SmallVector<Register, 2> SrcRegs, DstRegs;
3122   SmallVector<uint64_t, 2> Indexes;
3123   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3124 
3125   Register OpReg = MI.getOperand(2).getReg();
3126   uint64_t OpStart = MI.getOperand(3).getImm();
3127   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3128   for (int i = 0; i < NumParts; ++i) {
3129     unsigned DstStart = i * NarrowSize;
3130 
3131     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3132       // No part of the insert affects this subregister, forward the original.
3133       DstRegs.push_back(SrcRegs[i]);
3134       continue;
3135     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3136       // The entire subregister is defined by this insert, forward the new
3137       // value.
3138       DstRegs.push_back(OpReg);
3139       continue;
3140     }
3141 
3142     // OpSegStart is where this destination segment would start in OpReg if it
3143     // extended infinitely in both directions.
3144     int64_t ExtractOffset, InsertOffset;
3145     uint64_t SegSize;
3146     if (OpStart < DstStart) {
3147       InsertOffset = 0;
3148       ExtractOffset = DstStart - OpStart;
3149       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3150     } else {
3151       InsertOffset = OpStart - DstStart;
3152       ExtractOffset = 0;
3153       SegSize =
3154         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3155     }
3156 
3157     Register SegReg = OpReg;
3158     if (ExtractOffset != 0 || SegSize != OpSize) {
3159       // A genuine extract is needed.
3160       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3161       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3162     }
3163 
3164     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3165     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3166     DstRegs.push_back(DstReg);
3167   }
3168 
3169   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3170   Register DstReg = MI.getOperand(0).getReg();
3171   if(MRI.getType(DstReg).isVector())
3172     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3173   else
3174     MIRBuilder.buildMerge(DstReg, DstRegs);
3175   MI.eraseFromParent();
3176   return Legalized;
3177 }
3178 
3179 LegalizerHelper::LegalizeResult
3180 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3181                                    LLT NarrowTy) {
3182   Register DstReg = MI.getOperand(0).getReg();
3183   LLT DstTy = MRI.getType(DstReg);
3184 
3185   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3186 
3187   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3188   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3189   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3190   LLT LeftoverTy;
3191   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3192                     Src0Regs, Src0LeftoverRegs))
3193     return UnableToLegalize;
3194 
3195   LLT Unused;
3196   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3197                     Src1Regs, Src1LeftoverRegs))
3198     llvm_unreachable("inconsistent extractParts result");
3199 
3200   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3201     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3202                                         {Src0Regs[I], Src1Regs[I]});
3203     DstRegs.push_back(Inst->getOperand(0).getReg());
3204   }
3205 
3206   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3207     auto Inst = MIRBuilder.buildInstr(
3208       MI.getOpcode(),
3209       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3210     DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3211   }
3212 
3213   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3214               LeftoverTy, DstLeftoverRegs);
3215 
3216   MI.eraseFromParent();
3217   return Legalized;
3218 }
3219 
3220 LegalizerHelper::LegalizeResult
3221 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3222                                     LLT NarrowTy) {
3223   if (TypeIdx != 0)
3224     return UnableToLegalize;
3225 
3226   Register CondReg = MI.getOperand(1).getReg();
3227   LLT CondTy = MRI.getType(CondReg);
3228   if (CondTy.isVector()) // TODO: Handle vselect
3229     return UnableToLegalize;
3230 
3231   Register DstReg = MI.getOperand(0).getReg();
3232   LLT DstTy = MRI.getType(DstReg);
3233 
3234   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3235   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3236   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3237   LLT LeftoverTy;
3238   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3239                     Src1Regs, Src1LeftoverRegs))
3240     return UnableToLegalize;
3241 
3242   LLT Unused;
3243   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3244                     Src2Regs, Src2LeftoverRegs))
3245     llvm_unreachable("inconsistent extractParts result");
3246 
3247   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3248     auto Select = MIRBuilder.buildSelect(NarrowTy,
3249                                          CondReg, Src1Regs[I], Src2Regs[I]);
3250     DstRegs.push_back(Select->getOperand(0).getReg());
3251   }
3252 
3253   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3254     auto Select = MIRBuilder.buildSelect(
3255       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3256     DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3257   }
3258 
3259   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3260               LeftoverTy, DstLeftoverRegs);
3261 
3262   MI.eraseFromParent();
3263   return Legalized;
3264 }
3265 
3266 LegalizerHelper::LegalizeResult
3267 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3268   unsigned Opc = MI.getOpcode();
3269   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3270   auto isSupported = [this](const LegalityQuery &Q) {
3271     auto QAction = LI.getAction(Q).Action;
3272     return QAction == Legal || QAction == Libcall || QAction == Custom;
3273   };
3274   switch (Opc) {
3275   default:
3276     return UnableToLegalize;
3277   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3278     // This trivially expands to CTLZ.
3279     Observer.changingInstr(MI);
3280     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3281     Observer.changedInstr(MI);
3282     return Legalized;
3283   }
3284   case TargetOpcode::G_CTLZ: {
3285     Register SrcReg = MI.getOperand(1).getReg();
3286     unsigned Len = Ty.getSizeInBits();
3287     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3288       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3289       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3290                                              {Ty}, {SrcReg});
3291       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3292       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3293       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3294                                           SrcReg, MIBZero);
3295       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3296                              MIBCtlzZU);
3297       MI.eraseFromParent();
3298       return Legalized;
3299     }
3300     // for now, we do this:
3301     // NewLen = NextPowerOf2(Len);
3302     // x = x | (x >> 1);
3303     // x = x | (x >> 2);
3304     // ...
3305     // x = x | (x >>16);
3306     // x = x | (x >>32); // for 64-bit input
3307     // Upto NewLen/2
3308     // return Len - popcount(x);
3309     //
3310     // Ref: "Hacker's Delight" by Henry Warren
3311     Register Op = SrcReg;
3312     unsigned NewLen = PowerOf2Ceil(Len);
3313     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3314       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3315       auto MIBOp = MIRBuilder.buildInstr(
3316           TargetOpcode::G_OR, {Ty},
3317           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3318                                      {Op, MIBShiftAmt})});
3319       Op = MIBOp->getOperand(0).getReg();
3320     }
3321     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3322     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3323                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3324     MI.eraseFromParent();
3325     return Legalized;
3326   }
3327   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3328     // This trivially expands to CTTZ.
3329     Observer.changingInstr(MI);
3330     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3331     Observer.changedInstr(MI);
3332     return Legalized;
3333   }
3334   case TargetOpcode::G_CTTZ: {
3335     Register SrcReg = MI.getOperand(1).getReg();
3336     unsigned Len = Ty.getSizeInBits();
3337     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3338       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3339       // zero.
3340       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3341                                              {Ty}, {SrcReg});
3342       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3343       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3344       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3345                                           SrcReg, MIBZero);
3346       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3347                              MIBCttzZU);
3348       MI.eraseFromParent();
3349       return Legalized;
3350     }
3351     // for now, we use: { return popcount(~x & (x - 1)); }
3352     // unless the target has ctlz but not ctpop, in which case we use:
3353     // { return 32 - nlz(~x & (x-1)); }
3354     // Ref: "Hacker's Delight" by Henry Warren
3355     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3356     auto MIBNot =
3357         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3358     auto MIBTmp = MIRBuilder.buildInstr(
3359         TargetOpcode::G_AND, {Ty},
3360         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3361                                        {SrcReg, MIBCstNeg1})});
3362     if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3363         isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3364       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3365       MIRBuilder.buildInstr(
3366           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3367           {MIBCstLen,
3368            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3369       MI.eraseFromParent();
3370       return Legalized;
3371     }
3372     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3373     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3374     return Legalized;
3375   }
3376   }
3377 }
3378 
3379 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3380 // representation.
3381 LegalizerHelper::LegalizeResult
3382 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3383   Register Dst = MI.getOperand(0).getReg();
3384   Register Src = MI.getOperand(1).getReg();
3385   const LLT S64 = LLT::scalar(64);
3386   const LLT S32 = LLT::scalar(32);
3387   const LLT S1 = LLT::scalar(1);
3388 
3389   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3390 
3391   // unsigned cul2f(ulong u) {
3392   //   uint lz = clz(u);
3393   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
3394   //   u = (u << lz) & 0x7fffffffffffffffUL;
3395   //   ulong t = u & 0xffffffffffUL;
3396   //   uint v = (e << 23) | (uint)(u >> 40);
3397   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3398   //   return as_float(v + r);
3399   // }
3400 
3401   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3402   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3403 
3404   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3405 
3406   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3407   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3408 
3409   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3410   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3411 
3412   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3413   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3414 
3415   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3416 
3417   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3418   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3419 
3420   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3421   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3422   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3423 
3424   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3425   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3426   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3427   auto One = MIRBuilder.buildConstant(S32, 1);
3428 
3429   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3430   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3431   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3432   MIRBuilder.buildAdd(Dst, V, R);
3433 
3434   return Legalized;
3435 }
3436 
3437 LegalizerHelper::LegalizeResult
3438 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3439   Register Dst = MI.getOperand(0).getReg();
3440   Register Src = MI.getOperand(1).getReg();
3441   LLT DstTy = MRI.getType(Dst);
3442   LLT SrcTy = MRI.getType(Src);
3443 
3444   if (SrcTy != LLT::scalar(64))
3445     return UnableToLegalize;
3446 
3447   if (DstTy == LLT::scalar(32)) {
3448     // TODO: SelectionDAG has several alternative expansions to port which may
3449     // be more reasonble depending on the available instructions. If a target
3450     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3451     // intermediate type, this is probably worse.
3452     return lowerU64ToF32BitOps(MI);
3453   }
3454 
3455   return UnableToLegalize;
3456 }
3457 
3458 LegalizerHelper::LegalizeResult
3459 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3460   Register Dst = MI.getOperand(0).getReg();
3461   Register Src = MI.getOperand(1).getReg();
3462   LLT DstTy = MRI.getType(Dst);
3463   LLT SrcTy = MRI.getType(Src);
3464 
3465   const LLT S64 = LLT::scalar(64);
3466   const LLT S32 = LLT::scalar(32);
3467   const LLT S1 = LLT::scalar(1);
3468 
3469   if (SrcTy != S64)
3470     return UnableToLegalize;
3471 
3472   if (DstTy == S32) {
3473     // signed cl2f(long l) {
3474     //   long s = l >> 63;
3475     //   float r = cul2f((l + s) ^ s);
3476     //   return s ? -r : r;
3477     // }
3478     Register L = Src;
3479     auto SignBit = MIRBuilder.buildConstant(S64, 63);
3480     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3481 
3482     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3483     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3484     auto R = MIRBuilder.buildUITOFP(S32, Xor);
3485 
3486     auto RNeg = MIRBuilder.buildFNeg(S32, R);
3487     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3488                                             MIRBuilder.buildConstant(S64, 0));
3489     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3490     return Legalized;
3491   }
3492 
3493   return UnableToLegalize;
3494 }
3495 
3496 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
3497   switch (Opc) {
3498   case TargetOpcode::G_SMIN:
3499     return CmpInst::ICMP_SLT;
3500   case TargetOpcode::G_SMAX:
3501     return CmpInst::ICMP_SGT;
3502   case TargetOpcode::G_UMIN:
3503     return CmpInst::ICMP_ULT;
3504   case TargetOpcode::G_UMAX:
3505     return CmpInst::ICMP_UGT;
3506   default:
3507     llvm_unreachable("not in integer min/max");
3508   }
3509 }
3510 
3511 LegalizerHelper::LegalizeResult
3512 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3513   Register Dst = MI.getOperand(0).getReg();
3514   Register Src0 = MI.getOperand(1).getReg();
3515   Register Src1 = MI.getOperand(2).getReg();
3516 
3517   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
3518   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
3519 
3520   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
3521   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
3522 
3523   MI.eraseFromParent();
3524   return Legalized;
3525 }
3526 
3527 LegalizerHelper::LegalizeResult
3528 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3529   Register Dst = MI.getOperand(0).getReg();
3530   Register Src0 = MI.getOperand(1).getReg();
3531   Register Src1 = MI.getOperand(2).getReg();
3532 
3533   const LLT Src0Ty = MRI.getType(Src0);
3534   const LLT Src1Ty = MRI.getType(Src1);
3535 
3536   const int Src0Size = Src0Ty.getScalarSizeInBits();
3537   const int Src1Size = Src1Ty.getScalarSizeInBits();
3538 
3539   auto SignBitMask = MIRBuilder.buildConstant(
3540     Src0Ty, APInt::getSignMask(Src0Size));
3541 
3542   auto NotSignBitMask = MIRBuilder.buildConstant(
3543     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
3544 
3545   auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
3546   MachineInstr *Or;
3547 
3548   if (Src0Ty == Src1Ty) {
3549     auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
3550     Or = MIRBuilder.buildOr(Dst, And0, And1);
3551   } else if (Src0Size > Src1Size) {
3552     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
3553     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
3554     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
3555     auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
3556     Or = MIRBuilder.buildOr(Dst, And0, And1);
3557   } else {
3558     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
3559     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
3560     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
3561     auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
3562     Or = MIRBuilder.buildOr(Dst, And0, And1);
3563   }
3564 
3565   // Be careful about setting nsz/nnan/ninf on every instruction, since the
3566   // constants are a nan and -0.0, but the final result should preserve
3567   // everything.
3568   if (unsigned Flags = MI.getFlags())
3569     Or->setFlags(Flags);
3570 
3571   MI.eraseFromParent();
3572   return Legalized;
3573 }
3574 
3575 LegalizerHelper::LegalizeResult
3576 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
3577   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
3578     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
3579 
3580   Register Dst = MI.getOperand(0).getReg();
3581   Register Src0 = MI.getOperand(1).getReg();
3582   Register Src1 = MI.getOperand(2).getReg();
3583   LLT Ty = MRI.getType(Dst);
3584 
3585   if (!MI.getFlag(MachineInstr::FmNoNans)) {
3586     // Insert canonicalizes if it's possible we need to quiet to get correct
3587     // sNaN behavior.
3588 
3589     // Note this must be done here, and not as an optimization combine in the
3590     // absence of a dedicate quiet-snan instruction as we're using an
3591     // omni-purpose G_FCANONICALIZE.
3592     if (!isKnownNeverSNaN(Src0, MRI))
3593       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
3594 
3595     if (!isKnownNeverSNaN(Src1, MRI))
3596       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
3597   }
3598 
3599   // If there are no nans, it's safe to simply replace this with the non-IEEE
3600   // version.
3601   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
3602   MI.eraseFromParent();
3603   return Legalized;
3604 }
3605 
3606 LegalizerHelper::LegalizeResult
3607 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
3608   const unsigned NumDst = MI.getNumOperands() - 1;
3609   const Register SrcReg = MI.getOperand(NumDst).getReg();
3610   LLT SrcTy = MRI.getType(SrcReg);
3611 
3612   Register Dst0Reg = MI.getOperand(0).getReg();
3613   LLT DstTy = MRI.getType(Dst0Reg);
3614 
3615 
3616   // Expand scalarizing unmerge as bitcast to integer and shift.
3617   if (!DstTy.isVector() && SrcTy.isVector() &&
3618       SrcTy.getElementType() == DstTy) {
3619     LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
3620     Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
3621 
3622     MIRBuilder.buildTrunc(Dst0Reg, Cast);
3623 
3624     const unsigned DstSize = DstTy.getSizeInBits();
3625     unsigned Offset = DstSize;
3626     for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
3627       auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
3628       auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
3629       MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
3630     }
3631 
3632     MI.eraseFromParent();
3633     return Legalized;
3634   }
3635 
3636   return UnableToLegalize;
3637 }
3638