1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 if the breakdown is not satisfiable.
38 static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
39   assert(!LeftoverTy.isValid() && "this is an out argument");
40 
41   unsigned Size = OrigTy.getSizeInBits();
42   unsigned NarrowSize = NarrowTy.getSizeInBits();
43   unsigned NumParts = Size / NarrowSize;
44   unsigned LeftoverSize = Size - NumParts * NarrowSize;
45   assert(Size > NarrowSize);
46 
47   if (LeftoverSize == 0)
48     return NumParts;
49 
50   if (NarrowTy.isVector()) {
51     unsigned EltSize = OrigTy.getScalarSizeInBits();
52     if (LeftoverSize % EltSize != 0)
53       return -1;
54     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
55   } else {
56     LeftoverTy = LLT::scalar(LeftoverSize);
57   }
58 
59   return NumParts;
60 }
61 
62 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
63                                  GISelChangeObserver &Observer,
64                                  MachineIRBuilder &Builder)
65     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
66       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
67   MIRBuilder.setMF(MF);
68   MIRBuilder.setChangeObserver(Observer);
69 }
70 
71 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
72                                  GISelChangeObserver &Observer,
73                                  MachineIRBuilder &B)
74     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
75   MIRBuilder.setMF(MF);
76   MIRBuilder.setChangeObserver(Observer);
77 }
78 LegalizerHelper::LegalizeResult
79 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
80   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
81 
82   auto Step = LI.getAction(MI, MRI);
83   switch (Step.Action) {
84   case Legal:
85     LLVM_DEBUG(dbgs() << ".. Already legal\n");
86     return AlreadyLegal;
87   case Libcall:
88     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
89     return libcall(MI);
90   case NarrowScalar:
91     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
92     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
93   case WidenScalar:
94     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
95     return widenScalar(MI, Step.TypeIdx, Step.NewType);
96   case Lower:
97     LLVM_DEBUG(dbgs() << ".. Lower\n");
98     return lower(MI, Step.TypeIdx, Step.NewType);
99   case FewerElements:
100     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
101     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
102   case MoreElements:
103     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
104     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
105   case Custom:
106     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
107     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
108                                                             : UnableToLegalize;
109   default:
110     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
111     return UnableToLegalize;
112   }
113 }
114 
115 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
116                                    SmallVectorImpl<unsigned> &VRegs) {
117   for (int i = 0; i < NumParts; ++i)
118     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
119   MIRBuilder.buildUnmerge(VRegs, Reg);
120 }
121 
122 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy,
123                                    LLT MainTy, LLT &LeftoverTy,
124                                    SmallVectorImpl<unsigned> &VRegs,
125                                    SmallVectorImpl<unsigned> &LeftoverRegs) {
126   assert(!LeftoverTy.isValid() && "this is an out argument");
127 
128   unsigned RegSize = RegTy.getSizeInBits();
129   unsigned MainSize = MainTy.getSizeInBits();
130   unsigned NumParts = RegSize / MainSize;
131   unsigned LeftoverSize = RegSize - NumParts * MainSize;
132 
133   // Use an unmerge when possible.
134   if (LeftoverSize == 0) {
135     for (unsigned I = 0; I < NumParts; ++I)
136       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
137     MIRBuilder.buildUnmerge(VRegs, Reg);
138     return true;
139   }
140 
141   if (MainTy.isVector()) {
142     unsigned EltSize = MainTy.getScalarSizeInBits();
143     if (LeftoverSize % EltSize != 0)
144       return false;
145     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
146   } else {
147     LeftoverTy = LLT::scalar(LeftoverSize);
148   }
149 
150   // For irregular sizes, extract the individual parts.
151   for (unsigned I = 0; I != NumParts; ++I) {
152     unsigned NewReg = MRI.createGenericVirtualRegister(MainTy);
153     VRegs.push_back(NewReg);
154     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
155   }
156 
157   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
158        Offset += LeftoverSize) {
159     unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
160     LeftoverRegs.push_back(NewReg);
161     MIRBuilder.buildExtract(NewReg, Reg, Offset);
162   }
163 
164   return true;
165 }
166 
167 void LegalizerHelper::insertParts(unsigned DstReg,
168                                   LLT ResultTy, LLT PartTy,
169                                   ArrayRef<unsigned> PartRegs,
170                                   LLT LeftoverTy,
171                                   ArrayRef<unsigned> LeftoverRegs) {
172   if (!LeftoverTy.isValid()) {
173     assert(LeftoverRegs.empty());
174 
175     if (!ResultTy.isVector()) {
176       MIRBuilder.buildMerge(DstReg, PartRegs);
177       return;
178     }
179 
180     if (PartTy.isVector())
181       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
182     else
183       MIRBuilder.buildBuildVector(DstReg, PartRegs);
184     return;
185   }
186 
187   unsigned PartSize = PartTy.getSizeInBits();
188   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
189 
190   unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
191   MIRBuilder.buildUndef(CurResultReg);
192 
193   unsigned Offset = 0;
194   for (unsigned PartReg : PartRegs) {
195     unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
196     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
197     CurResultReg = NewResultReg;
198     Offset += PartSize;
199   }
200 
201   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
202     // Use the original output register for the final insert to avoid a copy.
203     unsigned NewResultReg = (I + 1 == E) ?
204       DstReg : MRI.createGenericVirtualRegister(ResultTy);
205 
206     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
207     CurResultReg = NewResultReg;
208     Offset += LeftoverPartSize;
209   }
210 }
211 
212 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
213   switch (Opcode) {
214   case TargetOpcode::G_SDIV:
215     assert((Size == 32 || Size == 64) && "Unsupported size");
216     return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
217   case TargetOpcode::G_UDIV:
218     assert((Size == 32 || Size == 64) && "Unsupported size");
219     return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
220   case TargetOpcode::G_SREM:
221     assert((Size == 32 || Size == 64) && "Unsupported size");
222     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
223   case TargetOpcode::G_UREM:
224     assert((Size == 32 || Size == 64) && "Unsupported size");
225     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
226   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
227     assert(Size == 32 && "Unsupported size");
228     return RTLIB::CTLZ_I32;
229   case TargetOpcode::G_FADD:
230     assert((Size == 32 || Size == 64) && "Unsupported size");
231     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
232   case TargetOpcode::G_FSUB:
233     assert((Size == 32 || Size == 64) && "Unsupported size");
234     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
235   case TargetOpcode::G_FMUL:
236     assert((Size == 32 || Size == 64) && "Unsupported size");
237     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
238   case TargetOpcode::G_FDIV:
239     assert((Size == 32 || Size == 64) && "Unsupported size");
240     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
241   case TargetOpcode::G_FEXP:
242     assert((Size == 32 || Size == 64) && "Unsupported size");
243     return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
244   case TargetOpcode::G_FREM:
245     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
246   case TargetOpcode::G_FPOW:
247     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
248   case TargetOpcode::G_FMA:
249     assert((Size == 32 || Size == 64) && "Unsupported size");
250     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
251   case TargetOpcode::G_FSIN:
252     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
253     return Size == 128 ? RTLIB::SIN_F128
254                        : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
255   case TargetOpcode::G_FCOS:
256     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
257     return Size == 128 ? RTLIB::COS_F128
258                        : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
259   case TargetOpcode::G_FLOG10:
260     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
261     return Size == 128 ? RTLIB::LOG10_F128
262                        : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
263   case TargetOpcode::G_FLOG:
264     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
265     return Size == 128 ? RTLIB::LOG_F128
266                        : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
267   case TargetOpcode::G_FLOG2:
268     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
269     return Size == 128 ? RTLIB::LOG2_F128
270                        : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
271   }
272   llvm_unreachable("Unknown libcall function");
273 }
274 
275 LegalizerHelper::LegalizeResult
276 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
277                     const CallLowering::ArgInfo &Result,
278                     ArrayRef<CallLowering::ArgInfo> Args) {
279   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
280   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
281   const char *Name = TLI.getLibcallName(Libcall);
282 
283   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
284   if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
285                      MachineOperand::CreateES(Name), Result, Args))
286     return LegalizerHelper::UnableToLegalize;
287 
288   return LegalizerHelper::Legalized;
289 }
290 
291 // Useful for libcalls where all operands have the same type.
292 static LegalizerHelper::LegalizeResult
293 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
294               Type *OpType) {
295   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
296 
297   SmallVector<CallLowering::ArgInfo, 3> Args;
298   for (unsigned i = 1; i < MI.getNumOperands(); i++)
299     Args.push_back({MI.getOperand(i).getReg(), OpType});
300   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
301                        Args);
302 }
303 
304 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
305                                        Type *FromType) {
306   auto ToMVT = MVT::getVT(ToType);
307   auto FromMVT = MVT::getVT(FromType);
308 
309   switch (Opcode) {
310   case TargetOpcode::G_FPEXT:
311     return RTLIB::getFPEXT(FromMVT, ToMVT);
312   case TargetOpcode::G_FPTRUNC:
313     return RTLIB::getFPROUND(FromMVT, ToMVT);
314   case TargetOpcode::G_FPTOSI:
315     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
316   case TargetOpcode::G_FPTOUI:
317     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
318   case TargetOpcode::G_SITOFP:
319     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
320   case TargetOpcode::G_UITOFP:
321     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
322   }
323   llvm_unreachable("Unsupported libcall function");
324 }
325 
326 static LegalizerHelper::LegalizeResult
327 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
328                   Type *FromType) {
329   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
330   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
331                        {{MI.getOperand(1).getReg(), FromType}});
332 }
333 
334 LegalizerHelper::LegalizeResult
335 LegalizerHelper::libcall(MachineInstr &MI) {
336   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
337   unsigned Size = LLTy.getSizeInBits();
338   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
339 
340   MIRBuilder.setInstr(MI);
341 
342   switch (MI.getOpcode()) {
343   default:
344     return UnableToLegalize;
345   case TargetOpcode::G_SDIV:
346   case TargetOpcode::G_UDIV:
347   case TargetOpcode::G_SREM:
348   case TargetOpcode::G_UREM:
349   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
350     Type *HLTy = IntegerType::get(Ctx, Size);
351     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
352     if (Status != Legalized)
353       return Status;
354     break;
355   }
356   case TargetOpcode::G_FADD:
357   case TargetOpcode::G_FSUB:
358   case TargetOpcode::G_FMUL:
359   case TargetOpcode::G_FDIV:
360   case TargetOpcode::G_FMA:
361   case TargetOpcode::G_FPOW:
362   case TargetOpcode::G_FREM:
363   case TargetOpcode::G_FCOS:
364   case TargetOpcode::G_FSIN:
365   case TargetOpcode::G_FLOG10:
366   case TargetOpcode::G_FLOG:
367   case TargetOpcode::G_FLOG2:
368   case TargetOpcode::G_FEXP: {
369     if (Size > 64) {
370       LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
371       return UnableToLegalize;
372     }
373     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
374     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
375     if (Status != Legalized)
376       return Status;
377     break;
378   }
379   case TargetOpcode::G_FPEXT: {
380     // FIXME: Support other floating point types (half, fp128 etc)
381     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
382     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
383     if (ToSize != 64 || FromSize != 32)
384       return UnableToLegalize;
385     LegalizeResult Status = conversionLibcall(
386         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
387     if (Status != Legalized)
388       return Status;
389     break;
390   }
391   case TargetOpcode::G_FPTRUNC: {
392     // FIXME: Support other floating point types (half, fp128 etc)
393     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
394     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
395     if (ToSize != 32 || FromSize != 64)
396       return UnableToLegalize;
397     LegalizeResult Status = conversionLibcall(
398         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
399     if (Status != Legalized)
400       return Status;
401     break;
402   }
403   case TargetOpcode::G_FPTOSI:
404   case TargetOpcode::G_FPTOUI: {
405     // FIXME: Support other types
406     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
407     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
408     if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
409       return UnableToLegalize;
410     LegalizeResult Status = conversionLibcall(
411         MI, MIRBuilder, Type::getInt32Ty(Ctx),
412         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
413     if (Status != Legalized)
414       return Status;
415     break;
416   }
417   case TargetOpcode::G_SITOFP:
418   case TargetOpcode::G_UITOFP: {
419     // FIXME: Support other types
420     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
421     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
422     if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
423       return UnableToLegalize;
424     LegalizeResult Status = conversionLibcall(
425         MI, MIRBuilder,
426         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
427         Type::getInt32Ty(Ctx));
428     if (Status != Legalized)
429       return Status;
430     break;
431   }
432   }
433 
434   MI.eraseFromParent();
435   return Legalized;
436 }
437 
438 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
439                                                               unsigned TypeIdx,
440                                                               LLT NarrowTy) {
441   MIRBuilder.setInstr(MI);
442 
443   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
444   uint64_t NarrowSize = NarrowTy.getSizeInBits();
445 
446   switch (MI.getOpcode()) {
447   default:
448     return UnableToLegalize;
449   case TargetOpcode::G_IMPLICIT_DEF: {
450     // FIXME: add support for when SizeOp0 isn't an exact multiple of
451     // NarrowSize.
452     if (SizeOp0 % NarrowSize != 0)
453       return UnableToLegalize;
454     int NumParts = SizeOp0 / NarrowSize;
455 
456     SmallVector<unsigned, 2> DstRegs;
457     for (int i = 0; i < NumParts; ++i)
458       DstRegs.push_back(
459           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
460 
461     unsigned DstReg = MI.getOperand(0).getReg();
462     if(MRI.getType(DstReg).isVector())
463       MIRBuilder.buildBuildVector(DstReg, DstRegs);
464     else
465       MIRBuilder.buildMerge(DstReg, DstRegs);
466     MI.eraseFromParent();
467     return Legalized;
468   }
469   case TargetOpcode::G_ADD: {
470     // FIXME: add support for when SizeOp0 isn't an exact multiple of
471     // NarrowSize.
472     if (SizeOp0 % NarrowSize != 0)
473       return UnableToLegalize;
474     // Expand in terms of carry-setting/consuming G_ADDE instructions.
475     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
476 
477     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
478     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
479     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
480 
481     unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
482     MIRBuilder.buildConstant(CarryIn, 0);
483 
484     for (int i = 0; i < NumParts; ++i) {
485       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
486       unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
487 
488       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
489                             Src2Regs[i], CarryIn);
490 
491       DstRegs.push_back(DstReg);
492       CarryIn = CarryOut;
493     }
494     unsigned DstReg = MI.getOperand(0).getReg();
495     if(MRI.getType(DstReg).isVector())
496       MIRBuilder.buildBuildVector(DstReg, DstRegs);
497     else
498       MIRBuilder.buildMerge(DstReg, DstRegs);
499     MI.eraseFromParent();
500     return Legalized;
501   }
502   case TargetOpcode::G_SUB: {
503     // FIXME: add support for when SizeOp0 isn't an exact multiple of
504     // NarrowSize.
505     if (SizeOp0 % NarrowSize != 0)
506       return UnableToLegalize;
507 
508     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
509 
510     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
511     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
512     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
513 
514     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
515     unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
516     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
517                           {Src1Regs[0], Src2Regs[0]});
518     DstRegs.push_back(DstReg);
519     unsigned BorrowIn = BorrowOut;
520     for (int i = 1; i < NumParts; ++i) {
521       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
522       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
523 
524       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
525                             {Src1Regs[i], Src2Regs[i], BorrowIn});
526 
527       DstRegs.push_back(DstReg);
528       BorrowIn = BorrowOut;
529     }
530     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
531     MI.eraseFromParent();
532     return Legalized;
533   }
534   case TargetOpcode::G_MUL:
535     return narrowScalarMul(MI, TypeIdx, NarrowTy);
536   case TargetOpcode::G_EXTRACT:
537     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
538   case TargetOpcode::G_INSERT:
539     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
540   case TargetOpcode::G_LOAD: {
541     const auto &MMO = **MI.memoperands_begin();
542     unsigned DstReg = MI.getOperand(0).getReg();
543     LLT DstTy = MRI.getType(DstReg);
544     if (DstTy.isVector())
545       return UnableToLegalize;
546 
547     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
548       unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
549       auto &MMO = **MI.memoperands_begin();
550       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
551       MIRBuilder.buildAnyExt(DstReg, TmpReg);
552       MI.eraseFromParent();
553       return Legalized;
554     }
555 
556     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
557   }
558   case TargetOpcode::G_ZEXTLOAD:
559   case TargetOpcode::G_SEXTLOAD: {
560     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
561     unsigned DstReg = MI.getOperand(0).getReg();
562     unsigned PtrReg = MI.getOperand(1).getReg();
563 
564     unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
565     auto &MMO = **MI.memoperands_begin();
566     if (MMO.getSize() * 8 == NarrowSize) {
567       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
568     } else {
569       unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
570         : TargetOpcode::G_SEXTLOAD;
571       MIRBuilder.buildInstr(ExtLoad)
572         .addDef(TmpReg)
573         .addUse(PtrReg)
574         .addMemOperand(&MMO);
575     }
576 
577     if (ZExt)
578       MIRBuilder.buildZExt(DstReg, TmpReg);
579     else
580       MIRBuilder.buildSExt(DstReg, TmpReg);
581 
582     MI.eraseFromParent();
583     return Legalized;
584   }
585   case TargetOpcode::G_STORE: {
586     const auto &MMO = **MI.memoperands_begin();
587 
588     unsigned SrcReg = MI.getOperand(0).getReg();
589     LLT SrcTy = MRI.getType(SrcReg);
590     if (SrcTy.isVector())
591       return UnableToLegalize;
592 
593     int NumParts = SizeOp0 / NarrowSize;
594     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
595     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
596     if (SrcTy.isVector() && LeftoverBits != 0)
597       return UnableToLegalize;
598 
599     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
600       unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
601       auto &MMO = **MI.memoperands_begin();
602       MIRBuilder.buildTrunc(TmpReg, SrcReg);
603       MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
604       MI.eraseFromParent();
605       return Legalized;
606     }
607 
608     return reduceLoadStoreWidth(MI, 0, NarrowTy);
609   }
610   case TargetOpcode::G_CONSTANT: {
611     // FIXME: add support for when SizeOp0 isn't an exact multiple of
612     // NarrowSize.
613     if (SizeOp0 % NarrowSize != 0)
614       return UnableToLegalize;
615     int NumParts = SizeOp0 / NarrowSize;
616     const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
617     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
618 
619     SmallVector<unsigned, 2> DstRegs;
620     for (int i = 0; i < NumParts; ++i) {
621       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
622       ConstantInt *CI =
623           ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
624       MIRBuilder.buildConstant(DstReg, *CI);
625       DstRegs.push_back(DstReg);
626     }
627     unsigned DstReg = MI.getOperand(0).getReg();
628     if(MRI.getType(DstReg).isVector())
629       MIRBuilder.buildBuildVector(DstReg, DstRegs);
630     else
631       MIRBuilder.buildMerge(DstReg, DstRegs);
632     MI.eraseFromParent();
633     return Legalized;
634   }
635   case TargetOpcode::G_SELECT:
636     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
637   case TargetOpcode::G_AND:
638   case TargetOpcode::G_OR:
639   case TargetOpcode::G_XOR: {
640     // Legalize bitwise operation:
641     // A = BinOp<Ty> B, C
642     // into:
643     // B1, ..., BN = G_UNMERGE_VALUES B
644     // C1, ..., CN = G_UNMERGE_VALUES C
645     // A1 = BinOp<Ty/N> B1, C2
646     // ...
647     // AN = BinOp<Ty/N> BN, CN
648     // A = G_MERGE_VALUES A1, ..., AN
649 
650     // FIXME: add support for when SizeOp0 isn't an exact multiple of
651     // NarrowSize.
652     if (SizeOp0 % NarrowSize != 0)
653       return UnableToLegalize;
654     int NumParts = SizeOp0 / NarrowSize;
655 
656     // List the registers where the destination will be scattered.
657     SmallVector<unsigned, 2> DstRegs;
658     // List the registers where the first argument will be split.
659     SmallVector<unsigned, 2> SrcsReg1;
660     // List the registers where the second argument will be split.
661     SmallVector<unsigned, 2> SrcsReg2;
662     // Create all the temporary registers.
663     for (int i = 0; i < NumParts; ++i) {
664       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
665       unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
666       unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
667 
668       DstRegs.push_back(DstReg);
669       SrcsReg1.push_back(SrcReg1);
670       SrcsReg2.push_back(SrcReg2);
671     }
672     // Explode the big arguments into smaller chunks.
673     MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
674     MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
675 
676     // Do the operation on each small part.
677     for (int i = 0; i < NumParts; ++i)
678       MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
679                             {SrcsReg1[i], SrcsReg2[i]});
680 
681     // Gather the destination registers into the final destination.
682     unsigned DstReg = MI.getOperand(0).getReg();
683     if(MRI.getType(DstReg).isVector())
684       MIRBuilder.buildBuildVector(DstReg, DstRegs);
685     else
686       MIRBuilder.buildMerge(DstReg, DstRegs);
687     MI.eraseFromParent();
688     return Legalized;
689   }
690   case TargetOpcode::G_SHL:
691   case TargetOpcode::G_LSHR:
692   case TargetOpcode::G_ASHR:
693     return narrowScalarShift(MI, TypeIdx, NarrowTy);
694   case TargetOpcode::G_CTLZ:
695   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
696   case TargetOpcode::G_CTTZ:
697   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
698   case TargetOpcode::G_CTPOP:
699     if (TypeIdx != 0)
700       return UnableToLegalize; // TODO
701 
702     Observer.changingInstr(MI);
703     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
704     Observer.changedInstr(MI);
705     return Legalized;
706   case TargetOpcode::G_INTTOPTR:
707     if (TypeIdx != 1)
708       return UnableToLegalize;
709 
710     Observer.changingInstr(MI);
711     narrowScalarSrc(MI, NarrowTy, 1);
712     Observer.changedInstr(MI);
713     return Legalized;
714   case TargetOpcode::G_PTRTOINT:
715     if (TypeIdx != 0)
716       return UnableToLegalize;
717 
718     Observer.changingInstr(MI);
719     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
720     Observer.changedInstr(MI);
721     return Legalized;
722   }
723 }
724 
725 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
726                                      unsigned OpIdx, unsigned ExtOpcode) {
727   MachineOperand &MO = MI.getOperand(OpIdx);
728   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
729   MO.setReg(ExtB->getOperand(0).getReg());
730 }
731 
732 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
733                                       unsigned OpIdx) {
734   MachineOperand &MO = MI.getOperand(OpIdx);
735   auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
736                                     {MO.getReg()});
737   MO.setReg(ExtB->getOperand(0).getReg());
738 }
739 
740 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
741                                      unsigned OpIdx, unsigned TruncOpcode) {
742   MachineOperand &MO = MI.getOperand(OpIdx);
743   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
744   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
745   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
746   MO.setReg(DstExt);
747 }
748 
749 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
750                                       unsigned OpIdx, unsigned ExtOpcode) {
751   MachineOperand &MO = MI.getOperand(OpIdx);
752   unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
753   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
754   MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
755   MO.setReg(DstTrunc);
756 }
757 
758 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
759                                             unsigned OpIdx) {
760   MachineOperand &MO = MI.getOperand(OpIdx);
761   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
762   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
763   MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
764   MO.setReg(DstExt);
765 }
766 
767 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
768                                             unsigned OpIdx) {
769   MachineOperand &MO = MI.getOperand(OpIdx);
770 
771   LLT OldTy = MRI.getType(MO.getReg());
772   unsigned OldElts = OldTy.getNumElements();
773   unsigned NewElts = MoreTy.getNumElements();
774 
775   unsigned NumParts = NewElts / OldElts;
776 
777   // Use concat_vectors if the result is a multiple of the number of elements.
778   if (NumParts * OldElts == NewElts) {
779     SmallVector<unsigned, 8> Parts;
780     Parts.push_back(MO.getReg());
781 
782     unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
783     for (unsigned I = 1; I != NumParts; ++I)
784       Parts.push_back(ImpDef);
785 
786     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
787     MO.setReg(Concat.getReg(0));
788     return;
789   }
790 
791   unsigned MoreReg = MRI.createGenericVirtualRegister(MoreTy);
792   unsigned ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
793   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
794   MO.setReg(MoreReg);
795 }
796 
797 LegalizerHelper::LegalizeResult
798 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
799                                         LLT WideTy) {
800   if (TypeIdx != 1)
801     return UnableToLegalize;
802 
803   unsigned DstReg = MI.getOperand(0).getReg();
804   LLT DstTy = MRI.getType(DstReg);
805   if (!DstTy.isScalar())
806     return UnableToLegalize;
807 
808   unsigned NumOps = MI.getNumOperands();
809   unsigned NumSrc = MI.getNumOperands() - 1;
810   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
811 
812   unsigned Src1 = MI.getOperand(1).getReg();
813   unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg();
814 
815   for (unsigned I = 2; I != NumOps; ++I) {
816     const unsigned Offset = (I - 1) * PartSize;
817 
818     unsigned SrcReg = MI.getOperand(I).getReg();
819     assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
820 
821     auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
822 
823     unsigned NextResult = I + 1 == NumOps ? DstReg :
824       MRI.createGenericVirtualRegister(DstTy);
825 
826     auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
827     auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
828     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
829     ResultReg = NextResult;
830   }
831 
832   MI.eraseFromParent();
833   return Legalized;
834 }
835 
836 LegalizerHelper::LegalizeResult
837 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
838                                           LLT WideTy) {
839   if (TypeIdx != 0)
840     return UnableToLegalize;
841 
842   unsigned NumDst = MI.getNumOperands() - 1;
843   unsigned SrcReg = MI.getOperand(NumDst).getReg();
844   LLT SrcTy = MRI.getType(SrcReg);
845   if (!SrcTy.isScalar())
846     return UnableToLegalize;
847 
848   unsigned Dst0Reg = MI.getOperand(0).getReg();
849   LLT DstTy = MRI.getType(Dst0Reg);
850   if (!DstTy.isScalar())
851     return UnableToLegalize;
852 
853   unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
854   LLT NewSrcTy = LLT::scalar(NewSrcSize);
855   unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
856 
857   auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
858 
859   for (unsigned I = 1; I != NumDst; ++I) {
860     auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
861     auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
862     WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
863   }
864 
865   Observer.changingInstr(MI);
866 
867   MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
868   for (unsigned I = 0; I != NumDst; ++I)
869     widenScalarDst(MI, WideTy, I);
870 
871   Observer.changedInstr(MI);
872 
873   return Legalized;
874 }
875 
876 LegalizerHelper::LegalizeResult
877 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
878                                     LLT WideTy) {
879   unsigned DstReg = MI.getOperand(0).getReg();
880   unsigned SrcReg = MI.getOperand(1).getReg();
881   LLT SrcTy = MRI.getType(SrcReg);
882 
883   LLT DstTy = MRI.getType(DstReg);
884   unsigned Offset = MI.getOperand(2).getImm();
885 
886   if (TypeIdx == 0) {
887     if (SrcTy.isVector() || DstTy.isVector())
888       return UnableToLegalize;
889 
890     SrcOp Src(SrcReg);
891     if (SrcTy.isPointer()) {
892       // Extracts from pointers can be handled only if they are really just
893       // simple integers.
894       const DataLayout &DL = MIRBuilder.getDataLayout();
895       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
896         return UnableToLegalize;
897 
898       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
899       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
900       SrcTy = SrcAsIntTy;
901     }
902 
903     if (DstTy.isPointer())
904       return UnableToLegalize;
905 
906     if (Offset == 0) {
907       // Avoid a shift in the degenerate case.
908       MIRBuilder.buildTrunc(DstReg,
909                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
910       MI.eraseFromParent();
911       return Legalized;
912     }
913 
914     // Do a shift in the source type.
915     LLT ShiftTy = SrcTy;
916     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
917       Src = MIRBuilder.buildAnyExt(WideTy, Src);
918       ShiftTy = WideTy;
919     } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
920       return UnableToLegalize;
921 
922     auto LShr = MIRBuilder.buildLShr(
923       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
924     MIRBuilder.buildTrunc(DstReg, LShr);
925     MI.eraseFromParent();
926     return Legalized;
927   }
928 
929   if (!SrcTy.isVector())
930     return UnableToLegalize;
931 
932   if (DstTy != SrcTy.getElementType())
933     return UnableToLegalize;
934 
935   if (Offset % SrcTy.getScalarSizeInBits() != 0)
936     return UnableToLegalize;
937 
938   Observer.changingInstr(MI);
939   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
940 
941   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
942                           Offset);
943   widenScalarDst(MI, WideTy.getScalarType(), 0);
944   Observer.changedInstr(MI);
945   return Legalized;
946 }
947 
948 LegalizerHelper::LegalizeResult
949 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
950                                    LLT WideTy) {
951   if (TypeIdx != 0)
952     return UnableToLegalize;
953   Observer.changingInstr(MI);
954   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
955   widenScalarDst(MI, WideTy);
956   Observer.changedInstr(MI);
957   return Legalized;
958 }
959 
960 LegalizerHelper::LegalizeResult
961 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
962   MIRBuilder.setInstr(MI);
963 
964   switch (MI.getOpcode()) {
965   default:
966     return UnableToLegalize;
967   case TargetOpcode::G_EXTRACT:
968     return widenScalarExtract(MI, TypeIdx, WideTy);
969   case TargetOpcode::G_INSERT:
970     return widenScalarInsert(MI, TypeIdx, WideTy);
971   case TargetOpcode::G_MERGE_VALUES:
972     return widenScalarMergeValues(MI, TypeIdx, WideTy);
973   case TargetOpcode::G_UNMERGE_VALUES:
974     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
975   case TargetOpcode::G_UADDO:
976   case TargetOpcode::G_USUBO: {
977     if (TypeIdx == 1)
978       return UnableToLegalize; // TODO
979     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
980                                          {MI.getOperand(2).getReg()});
981     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
982                                          {MI.getOperand(3).getReg()});
983     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
984                           ? TargetOpcode::G_ADD
985                           : TargetOpcode::G_SUB;
986     // Do the arithmetic in the larger type.
987     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
988     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
989     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
990     auto AndOp = MIRBuilder.buildInstr(
991         TargetOpcode::G_AND, {WideTy},
992         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
993     // There is no overflow if the AndOp is the same as NewOp.
994     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
995                          AndOp);
996     // Now trunc the NewOp to the original result.
997     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
998     MI.eraseFromParent();
999     return Legalized;
1000   }
1001   case TargetOpcode::G_CTTZ:
1002   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1003   case TargetOpcode::G_CTLZ:
1004   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1005   case TargetOpcode::G_CTPOP: {
1006     if (TypeIdx == 0) {
1007       Observer.changingInstr(MI);
1008       widenScalarDst(MI, WideTy, 0);
1009       Observer.changedInstr(MI);
1010       return Legalized;
1011     }
1012 
1013     unsigned SrcReg = MI.getOperand(1).getReg();
1014 
1015     // First ZEXT the input.
1016     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1017     LLT CurTy = MRI.getType(SrcReg);
1018     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1019       // The count is the same in the larger type except if the original
1020       // value was zero.  This can be handled by setting the bit just off
1021       // the top of the original type.
1022       auto TopBit =
1023           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1024       MIBSrc = MIRBuilder.buildOr(
1025         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1026     }
1027 
1028     // Perform the operation at the larger size.
1029     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1030     // This is already the correct result for CTPOP and CTTZs
1031     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1032         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1033       // The correct result is NewOp - (Difference in widety and current ty).
1034       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1035       MIBNewOp = MIRBuilder.buildInstr(
1036           TargetOpcode::G_SUB, {WideTy},
1037           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1038     }
1039 
1040     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1041     MI.eraseFromParent();
1042     return Legalized;
1043   }
1044   case TargetOpcode::G_BSWAP: {
1045     Observer.changingInstr(MI);
1046     unsigned DstReg = MI.getOperand(0).getReg();
1047 
1048     unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy);
1049     unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
1050     unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1051     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1052 
1053     MI.getOperand(0).setReg(DstExt);
1054 
1055     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1056 
1057     LLT Ty = MRI.getType(DstReg);
1058     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1059     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1060     MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1061       .addDef(ShrReg)
1062       .addUse(DstExt)
1063       .addUse(ShiftAmtReg);
1064 
1065     MIRBuilder.buildTrunc(DstReg, ShrReg);
1066     Observer.changedInstr(MI);
1067     return Legalized;
1068   }
1069   case TargetOpcode::G_ADD:
1070   case TargetOpcode::G_AND:
1071   case TargetOpcode::G_MUL:
1072   case TargetOpcode::G_OR:
1073   case TargetOpcode::G_XOR:
1074   case TargetOpcode::G_SUB:
1075     // Perform operation at larger width (any extension is fines here, high bits
1076     // don't affect the result) and then truncate the result back to the
1077     // original type.
1078     Observer.changingInstr(MI);
1079     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1080     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1081     widenScalarDst(MI, WideTy);
1082     Observer.changedInstr(MI);
1083     return Legalized;
1084 
1085   case TargetOpcode::G_SHL:
1086       Observer.changingInstr(MI);
1087 
1088     if (TypeIdx == 0) {
1089       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1090       widenScalarDst(MI, WideTy);
1091     } else {
1092       assert(TypeIdx == 1);
1093       // The "number of bits to shift" operand must preserve its value as an
1094       // unsigned integer:
1095       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1096     }
1097 
1098     Observer.changedInstr(MI);
1099     return Legalized;
1100 
1101   case TargetOpcode::G_SDIV:
1102   case TargetOpcode::G_SREM:
1103     Observer.changingInstr(MI);
1104     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1105     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1106     widenScalarDst(MI, WideTy);
1107     Observer.changedInstr(MI);
1108     return Legalized;
1109 
1110   case TargetOpcode::G_ASHR:
1111   case TargetOpcode::G_LSHR:
1112     Observer.changingInstr(MI);
1113 
1114     if (TypeIdx == 0) {
1115       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1116         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1117 
1118       widenScalarSrc(MI, WideTy, 1, CvtOp);
1119       widenScalarDst(MI, WideTy);
1120     } else {
1121       assert(TypeIdx == 1);
1122       // The "number of bits to shift" operand must preserve its value as an
1123       // unsigned integer:
1124       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1125     }
1126 
1127     Observer.changedInstr(MI);
1128     return Legalized;
1129   case TargetOpcode::G_UDIV:
1130   case TargetOpcode::G_UREM:
1131     Observer.changingInstr(MI);
1132     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1133     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1134     widenScalarDst(MI, WideTy);
1135     Observer.changedInstr(MI);
1136     return Legalized;
1137 
1138   case TargetOpcode::G_SELECT:
1139     Observer.changingInstr(MI);
1140     if (TypeIdx == 0) {
1141       // Perform operation at larger width (any extension is fine here, high
1142       // bits don't affect the result) and then truncate the result back to the
1143       // original type.
1144       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1145       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1146       widenScalarDst(MI, WideTy);
1147     } else {
1148       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1149       // Explicit extension is required here since high bits affect the result.
1150       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1151     }
1152     Observer.changedInstr(MI);
1153     return Legalized;
1154 
1155   case TargetOpcode::G_FPTOSI:
1156   case TargetOpcode::G_FPTOUI:
1157     if (TypeIdx != 0)
1158       return UnableToLegalize;
1159     Observer.changingInstr(MI);
1160     widenScalarDst(MI, WideTy);
1161     Observer.changedInstr(MI);
1162     return Legalized;
1163 
1164   case TargetOpcode::G_SITOFP:
1165     if (TypeIdx != 1)
1166       return UnableToLegalize;
1167     Observer.changingInstr(MI);
1168     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1169     Observer.changedInstr(MI);
1170     return Legalized;
1171 
1172   case TargetOpcode::G_UITOFP:
1173     if (TypeIdx != 1)
1174       return UnableToLegalize;
1175     Observer.changingInstr(MI);
1176     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1177     Observer.changedInstr(MI);
1178     return Legalized;
1179 
1180   case TargetOpcode::G_LOAD:
1181   case TargetOpcode::G_SEXTLOAD:
1182   case TargetOpcode::G_ZEXTLOAD:
1183     Observer.changingInstr(MI);
1184     widenScalarDst(MI, WideTy);
1185     Observer.changedInstr(MI);
1186     return Legalized;
1187 
1188   case TargetOpcode::G_STORE: {
1189     if (TypeIdx != 0)
1190       return UnableToLegalize;
1191 
1192     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1193     if (!isPowerOf2_32(Ty.getSizeInBits()))
1194       return UnableToLegalize;
1195 
1196     Observer.changingInstr(MI);
1197 
1198     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1199       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1200     widenScalarSrc(MI, WideTy, 0, ExtType);
1201 
1202     Observer.changedInstr(MI);
1203     return Legalized;
1204   }
1205   case TargetOpcode::G_CONSTANT: {
1206     MachineOperand &SrcMO = MI.getOperand(1);
1207     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1208     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1209     Observer.changingInstr(MI);
1210     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1211 
1212     widenScalarDst(MI, WideTy);
1213     Observer.changedInstr(MI);
1214     return Legalized;
1215   }
1216   case TargetOpcode::G_FCONSTANT: {
1217     MachineOperand &SrcMO = MI.getOperand(1);
1218     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1219     APFloat Val = SrcMO.getFPImm()->getValueAPF();
1220     bool LosesInfo;
1221     switch (WideTy.getSizeInBits()) {
1222     case 32:
1223       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1224                   &LosesInfo);
1225       break;
1226     case 64:
1227       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1228                   &LosesInfo);
1229       break;
1230     default:
1231       return UnableToLegalize;
1232     }
1233 
1234     assert(!LosesInfo && "extend should always be lossless");
1235 
1236     Observer.changingInstr(MI);
1237     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1238 
1239     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1240     Observer.changedInstr(MI);
1241     return Legalized;
1242   }
1243   case TargetOpcode::G_IMPLICIT_DEF: {
1244     Observer.changingInstr(MI);
1245     widenScalarDst(MI, WideTy);
1246     Observer.changedInstr(MI);
1247     return Legalized;
1248   }
1249   case TargetOpcode::G_BRCOND:
1250     Observer.changingInstr(MI);
1251     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1252     Observer.changedInstr(MI);
1253     return Legalized;
1254 
1255   case TargetOpcode::G_FCMP:
1256     Observer.changingInstr(MI);
1257     if (TypeIdx == 0)
1258       widenScalarDst(MI, WideTy);
1259     else {
1260       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1261       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1262     }
1263     Observer.changedInstr(MI);
1264     return Legalized;
1265 
1266   case TargetOpcode::G_ICMP:
1267     Observer.changingInstr(MI);
1268     if (TypeIdx == 0)
1269       widenScalarDst(MI, WideTy);
1270     else {
1271       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1272                                MI.getOperand(1).getPredicate()))
1273                                ? TargetOpcode::G_SEXT
1274                                : TargetOpcode::G_ZEXT;
1275       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1276       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1277     }
1278     Observer.changedInstr(MI);
1279     return Legalized;
1280 
1281   case TargetOpcode::G_GEP:
1282     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1283     Observer.changingInstr(MI);
1284     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1285     Observer.changedInstr(MI);
1286     return Legalized;
1287 
1288   case TargetOpcode::G_PHI: {
1289     assert(TypeIdx == 0 && "Expecting only Idx 0");
1290 
1291     Observer.changingInstr(MI);
1292     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1293       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1294       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1295       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1296     }
1297 
1298     MachineBasicBlock &MBB = *MI.getParent();
1299     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1300     widenScalarDst(MI, WideTy);
1301     Observer.changedInstr(MI);
1302     return Legalized;
1303   }
1304   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1305     if (TypeIdx == 0) {
1306       unsigned VecReg = MI.getOperand(1).getReg();
1307       LLT VecTy = MRI.getType(VecReg);
1308       Observer.changingInstr(MI);
1309 
1310       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1311                                      WideTy.getSizeInBits()),
1312                      1, TargetOpcode::G_SEXT);
1313 
1314       widenScalarDst(MI, WideTy, 0);
1315       Observer.changedInstr(MI);
1316       return Legalized;
1317     }
1318 
1319     if (TypeIdx != 2)
1320       return UnableToLegalize;
1321     Observer.changingInstr(MI);
1322     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1323     Observer.changedInstr(MI);
1324     return Legalized;
1325   }
1326   case TargetOpcode::G_FADD:
1327   case TargetOpcode::G_FMUL:
1328   case TargetOpcode::G_FSUB:
1329   case TargetOpcode::G_FMA:
1330   case TargetOpcode::G_FNEG:
1331   case TargetOpcode::G_FABS:
1332   case TargetOpcode::G_FCANONICALIZE:
1333   case TargetOpcode::G_FDIV:
1334   case TargetOpcode::G_FREM:
1335   case TargetOpcode::G_FCEIL:
1336   case TargetOpcode::G_FFLOOR:
1337   case TargetOpcode::G_FCOS:
1338   case TargetOpcode::G_FSIN:
1339   case TargetOpcode::G_FLOG10:
1340   case TargetOpcode::G_FLOG:
1341   case TargetOpcode::G_FLOG2:
1342   case TargetOpcode::G_FSQRT:
1343   case TargetOpcode::G_FEXP:
1344     assert(TypeIdx == 0);
1345     Observer.changingInstr(MI);
1346 
1347     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1348       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1349 
1350     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1351     Observer.changedInstr(MI);
1352     return Legalized;
1353   case TargetOpcode::G_INTTOPTR:
1354     if (TypeIdx != 1)
1355       return UnableToLegalize;
1356 
1357     Observer.changingInstr(MI);
1358     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1359     Observer.changedInstr(MI);
1360     return Legalized;
1361   case TargetOpcode::G_PTRTOINT:
1362     if (TypeIdx != 0)
1363       return UnableToLegalize;
1364 
1365     Observer.changingInstr(MI);
1366     widenScalarDst(MI, WideTy, 0);
1367     Observer.changedInstr(MI);
1368     return Legalized;
1369   }
1370 }
1371 
1372 LegalizerHelper::LegalizeResult
1373 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1374   using namespace TargetOpcode;
1375   MIRBuilder.setInstr(MI);
1376 
1377   switch(MI.getOpcode()) {
1378   default:
1379     return UnableToLegalize;
1380   case TargetOpcode::G_SREM:
1381   case TargetOpcode::G_UREM: {
1382     unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
1383     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1384         .addDef(QuotReg)
1385         .addUse(MI.getOperand(1).getReg())
1386         .addUse(MI.getOperand(2).getReg());
1387 
1388     unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
1389     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1390     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1391                         ProdReg);
1392     MI.eraseFromParent();
1393     return Legalized;
1394   }
1395   case TargetOpcode::G_SMULO:
1396   case TargetOpcode::G_UMULO: {
1397     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1398     // result.
1399     unsigned Res = MI.getOperand(0).getReg();
1400     unsigned Overflow = MI.getOperand(1).getReg();
1401     unsigned LHS = MI.getOperand(2).getReg();
1402     unsigned RHS = MI.getOperand(3).getReg();
1403 
1404     MIRBuilder.buildMul(Res, LHS, RHS);
1405 
1406     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1407                           ? TargetOpcode::G_SMULH
1408                           : TargetOpcode::G_UMULH;
1409 
1410     unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1411     MIRBuilder.buildInstr(Opcode)
1412       .addDef(HiPart)
1413       .addUse(LHS)
1414       .addUse(RHS);
1415 
1416     unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1417     MIRBuilder.buildConstant(Zero, 0);
1418 
1419     // For *signed* multiply, overflow is detected by checking:
1420     // (hi != (lo >> bitwidth-1))
1421     if (Opcode == TargetOpcode::G_SMULH) {
1422       unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1423       unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1424       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1425       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1426         .addDef(Shifted)
1427         .addUse(Res)
1428         .addUse(ShiftAmt);
1429       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1430     } else {
1431       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1432     }
1433     MI.eraseFromParent();
1434     return Legalized;
1435   }
1436   case TargetOpcode::G_FNEG: {
1437     // TODO: Handle vector types once we are able to
1438     // represent them.
1439     if (Ty.isVector())
1440       return UnableToLegalize;
1441     unsigned Res = MI.getOperand(0).getReg();
1442     Type *ZeroTy;
1443     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1444     switch (Ty.getSizeInBits()) {
1445     case 16:
1446       ZeroTy = Type::getHalfTy(Ctx);
1447       break;
1448     case 32:
1449       ZeroTy = Type::getFloatTy(Ctx);
1450       break;
1451     case 64:
1452       ZeroTy = Type::getDoubleTy(Ctx);
1453       break;
1454     case 128:
1455       ZeroTy = Type::getFP128Ty(Ctx);
1456       break;
1457     default:
1458       llvm_unreachable("unexpected floating-point type");
1459     }
1460     ConstantFP &ZeroForNegation =
1461         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1462     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1463     MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1464         .addDef(Res)
1465         .addUse(Zero->getOperand(0).getReg())
1466         .addUse(MI.getOperand(1).getReg());
1467     MI.eraseFromParent();
1468     return Legalized;
1469   }
1470   case TargetOpcode::G_FSUB: {
1471     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1472     // First, check if G_FNEG is marked as Lower. If so, we may
1473     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1474     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1475       return UnableToLegalize;
1476     unsigned Res = MI.getOperand(0).getReg();
1477     unsigned LHS = MI.getOperand(1).getReg();
1478     unsigned RHS = MI.getOperand(2).getReg();
1479     unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1480     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1481     MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1482         .addDef(Res)
1483         .addUse(LHS)
1484         .addUse(Neg);
1485     MI.eraseFromParent();
1486     return Legalized;
1487   }
1488   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1489     unsigned OldValRes = MI.getOperand(0).getReg();
1490     unsigned SuccessRes = MI.getOperand(1).getReg();
1491     unsigned Addr = MI.getOperand(2).getReg();
1492     unsigned CmpVal = MI.getOperand(3).getReg();
1493     unsigned NewVal = MI.getOperand(4).getReg();
1494     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1495                                   **MI.memoperands_begin());
1496     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1497     MI.eraseFromParent();
1498     return Legalized;
1499   }
1500   case TargetOpcode::G_LOAD:
1501   case TargetOpcode::G_SEXTLOAD:
1502   case TargetOpcode::G_ZEXTLOAD: {
1503     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1504     unsigned DstReg = MI.getOperand(0).getReg();
1505     unsigned PtrReg = MI.getOperand(1).getReg();
1506     LLT DstTy = MRI.getType(DstReg);
1507     auto &MMO = **MI.memoperands_begin();
1508 
1509     if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1510       // In the case of G_LOAD, this was a non-extending load already and we're
1511       // about to lower to the same instruction.
1512       if (MI.getOpcode() == TargetOpcode::G_LOAD)
1513           return UnableToLegalize;
1514       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1515       MI.eraseFromParent();
1516       return Legalized;
1517     }
1518 
1519     if (DstTy.isScalar()) {
1520       unsigned TmpReg = MRI.createGenericVirtualRegister(
1521           LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1522       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1523       switch (MI.getOpcode()) {
1524       default:
1525         llvm_unreachable("Unexpected opcode");
1526       case TargetOpcode::G_LOAD:
1527         MIRBuilder.buildAnyExt(DstReg, TmpReg);
1528         break;
1529       case TargetOpcode::G_SEXTLOAD:
1530         MIRBuilder.buildSExt(DstReg, TmpReg);
1531         break;
1532       case TargetOpcode::G_ZEXTLOAD:
1533         MIRBuilder.buildZExt(DstReg, TmpReg);
1534         break;
1535       }
1536       MI.eraseFromParent();
1537       return Legalized;
1538     }
1539 
1540     return UnableToLegalize;
1541   }
1542   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1543   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1544   case TargetOpcode::G_CTLZ:
1545   case TargetOpcode::G_CTTZ:
1546   case TargetOpcode::G_CTPOP:
1547     return lowerBitCount(MI, TypeIdx, Ty);
1548   case G_UADDO: {
1549     unsigned Res = MI.getOperand(0).getReg();
1550     unsigned CarryOut = MI.getOperand(1).getReg();
1551     unsigned LHS = MI.getOperand(2).getReg();
1552     unsigned RHS = MI.getOperand(3).getReg();
1553 
1554     MIRBuilder.buildAdd(Res, LHS, RHS);
1555     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
1556 
1557     MI.eraseFromParent();
1558     return Legalized;
1559   }
1560   case G_UADDE: {
1561     unsigned Res = MI.getOperand(0).getReg();
1562     unsigned CarryOut = MI.getOperand(1).getReg();
1563     unsigned LHS = MI.getOperand(2).getReg();
1564     unsigned RHS = MI.getOperand(3).getReg();
1565     unsigned CarryIn = MI.getOperand(4).getReg();
1566 
1567     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1568     unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1569 
1570     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1571     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1572     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1573     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1574 
1575     MI.eraseFromParent();
1576     return Legalized;
1577   }
1578   case G_USUBO: {
1579     unsigned Res = MI.getOperand(0).getReg();
1580     unsigned BorrowOut = MI.getOperand(1).getReg();
1581     unsigned LHS = MI.getOperand(2).getReg();
1582     unsigned RHS = MI.getOperand(3).getReg();
1583 
1584     MIRBuilder.buildSub(Res, LHS, RHS);
1585     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1586 
1587     MI.eraseFromParent();
1588     return Legalized;
1589   }
1590   case G_USUBE: {
1591     unsigned Res = MI.getOperand(0).getReg();
1592     unsigned BorrowOut = MI.getOperand(1).getReg();
1593     unsigned LHS = MI.getOperand(2).getReg();
1594     unsigned RHS = MI.getOperand(3).getReg();
1595     unsigned BorrowIn = MI.getOperand(4).getReg();
1596 
1597     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1598     unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1599     unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1600     unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1601 
1602     MIRBuilder.buildSub(TmpRes, LHS, RHS);
1603     MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1604     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1605     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1606     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1607     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1608 
1609     MI.eraseFromParent();
1610     return Legalized;
1611   }
1612   }
1613 }
1614 
1615 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1616     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1617   SmallVector<unsigned, 2> DstRegs;
1618 
1619   unsigned NarrowSize = NarrowTy.getSizeInBits();
1620   unsigned DstReg = MI.getOperand(0).getReg();
1621   unsigned Size = MRI.getType(DstReg).getSizeInBits();
1622   int NumParts = Size / NarrowSize;
1623   // FIXME: Don't know how to handle the situation where the small vectors
1624   // aren't all the same size yet.
1625   if (Size % NarrowSize != 0)
1626     return UnableToLegalize;
1627 
1628   for (int i = 0; i < NumParts; ++i) {
1629     unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1630     MIRBuilder.buildUndef(TmpReg);
1631     DstRegs.push_back(TmpReg);
1632   }
1633 
1634   if (NarrowTy.isVector())
1635     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1636   else
1637     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1638 
1639   MI.eraseFromParent();
1640   return Legalized;
1641 }
1642 
1643 LegalizerHelper::LegalizeResult
1644 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
1645                                           LLT NarrowTy) {
1646   const unsigned Opc = MI.getOpcode();
1647   const unsigned NumOps = MI.getNumOperands() - 1;
1648   const unsigned NarrowSize = NarrowTy.getSizeInBits();
1649   const unsigned DstReg = MI.getOperand(0).getReg();
1650   const unsigned Flags = MI.getFlags();
1651   const LLT DstTy = MRI.getType(DstReg);
1652   const unsigned Size = DstTy.getSizeInBits();
1653   const int NumParts = Size / NarrowSize;
1654   const LLT EltTy = DstTy.getElementType();
1655   const unsigned EltSize = EltTy.getSizeInBits();
1656   const unsigned BitsForNumParts = NarrowSize * NumParts;
1657 
1658   // Check if we have any leftovers. If we do, then only handle the case where
1659   // the leftover is one element.
1660   if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
1661     return UnableToLegalize;
1662 
1663   if (BitsForNumParts != Size) {
1664     unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
1665     MIRBuilder.buildUndef(AccumDstReg);
1666 
1667     // Handle the pieces which evenly divide into the requested type with
1668     // extract/op/insert sequence.
1669     for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
1670       SmallVector<SrcOp, 4> SrcOps;
1671       for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1672         unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
1673         MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
1674         SrcOps.push_back(PartOpReg);
1675       }
1676 
1677       unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
1678       MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1679 
1680       unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
1681       MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
1682       AccumDstReg = PartInsertReg;
1683     }
1684 
1685     // Handle the remaining element sized leftover piece.
1686     SmallVector<SrcOp, 4> SrcOps;
1687     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1688       unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy);
1689       MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
1690                               BitsForNumParts);
1691       SrcOps.push_back(PartOpReg);
1692     }
1693 
1694     unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy);
1695     MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1696     MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
1697     MI.eraseFromParent();
1698 
1699     return Legalized;
1700   }
1701 
1702   SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1703 
1704   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1705 
1706   if (NumOps >= 2)
1707     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1708 
1709   if (NumOps >= 3)
1710     extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1711 
1712   for (int i = 0; i < NumParts; ++i) {
1713     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1714 
1715     if (NumOps == 1)
1716       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1717     else if (NumOps == 2) {
1718       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1719     } else if (NumOps == 3) {
1720       MIRBuilder.buildInstr(Opc, {DstReg},
1721                             {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1722     }
1723 
1724     DstRegs.push_back(DstReg);
1725   }
1726 
1727   if (NarrowTy.isVector())
1728     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1729   else
1730     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1731 
1732   MI.eraseFromParent();
1733   return Legalized;
1734 }
1735 
1736 // Handle splitting vector operations which need to have the same number of
1737 // elements in each type index, but each type index may have a different element
1738 // type.
1739 //
1740 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1741 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1742 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1743 //
1744 // Also handles some irregular breakdown cases, e.g.
1745 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1746 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1747 //             s64 = G_SHL s64, s32
1748 LegalizerHelper::LegalizeResult
1749 LegalizerHelper::fewerElementsVectorMultiEltType(
1750   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
1751   if (TypeIdx != 0)
1752     return UnableToLegalize;
1753 
1754   const LLT NarrowTy0 = NarrowTyArg;
1755   const unsigned NewNumElts =
1756       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
1757 
1758   const unsigned DstReg = MI.getOperand(0).getReg();
1759   LLT DstTy = MRI.getType(DstReg);
1760   LLT LeftoverTy0;
1761 
1762   // All of the operands need to have the same number of elements, so if we can
1763   // determine a type breakdown for the result type, we can for all of the
1764   // source types.
1765   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0);
1766   if (NumParts < 0)
1767     return UnableToLegalize;
1768 
1769   SmallVector<MachineInstrBuilder, 4> NewInsts;
1770 
1771   SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs;
1772   SmallVector<unsigned, 4> PartRegs, LeftoverRegs;
1773 
1774   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1775     LLT LeftoverTy;
1776     unsigned SrcReg = MI.getOperand(I).getReg();
1777     LLT SrcTyI = MRI.getType(SrcReg);
1778     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
1779     LLT LeftoverTyI;
1780 
1781     // Split this operand into the requested typed registers, and any leftover
1782     // required to reproduce the original type.
1783     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
1784                       LeftoverRegs))
1785       return UnableToLegalize;
1786 
1787     if (I == 1) {
1788       // For the first operand, create an instruction for each part and setup
1789       // the result.
1790       for (unsigned PartReg : PartRegs) {
1791         unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1792         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
1793                                .addDef(PartDstReg)
1794                                .addUse(PartReg));
1795         DstRegs.push_back(PartDstReg);
1796       }
1797 
1798       for (unsigned LeftoverReg : LeftoverRegs) {
1799         unsigned PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
1800         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
1801                                .addDef(PartDstReg)
1802                                .addUse(LeftoverReg));
1803         LeftoverDstRegs.push_back(PartDstReg);
1804       }
1805     } else {
1806       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
1807 
1808       // Add the newly created operand splits to the existing instructions. The
1809       // odd-sized pieces are ordered after the requested NarrowTyArg sized
1810       // pieces.
1811       unsigned InstCount = 0;
1812       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
1813         NewInsts[InstCount++].addUse(PartRegs[J]);
1814       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
1815         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
1816     }
1817 
1818     PartRegs.clear();
1819     LeftoverRegs.clear();
1820   }
1821 
1822   // Insert the newly built operations and rebuild the result register.
1823   for (auto &MIB : NewInsts)
1824     MIRBuilder.insertInstr(MIB);
1825 
1826   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
1827 
1828   MI.eraseFromParent();
1829   return Legalized;
1830 }
1831 
1832 LegalizerHelper::LegalizeResult
1833 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
1834                                           LLT NarrowTy) {
1835   if (TypeIdx != 0)
1836     return UnableToLegalize;
1837 
1838   unsigned DstReg = MI.getOperand(0).getReg();
1839   unsigned SrcReg = MI.getOperand(1).getReg();
1840   LLT DstTy = MRI.getType(DstReg);
1841   LLT SrcTy = MRI.getType(SrcReg);
1842 
1843   LLT NarrowTy0 = NarrowTy;
1844   LLT NarrowTy1;
1845   unsigned NumParts;
1846 
1847   if (NarrowTy.isVector()) {
1848     // Uneven breakdown not handled.
1849     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
1850     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
1851       return UnableToLegalize;
1852 
1853     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
1854   } else {
1855     NumParts = DstTy.getNumElements();
1856     NarrowTy1 = SrcTy.getElementType();
1857   }
1858 
1859   SmallVector<unsigned, 4> SrcRegs, DstRegs;
1860   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
1861 
1862   for (unsigned I = 0; I < NumParts; ++I) {
1863     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1864     MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
1865       .addDef(DstReg)
1866       .addUse(SrcRegs[I]);
1867 
1868     NewInst->setFlags(MI.getFlags());
1869     DstRegs.push_back(DstReg);
1870   }
1871 
1872   if (NarrowTy.isVector())
1873     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1874   else
1875     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1876 
1877   MI.eraseFromParent();
1878   return Legalized;
1879 }
1880 
1881 LegalizerHelper::LegalizeResult
1882 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
1883                                         LLT NarrowTy) {
1884   unsigned DstReg = MI.getOperand(0).getReg();
1885   unsigned Src0Reg = MI.getOperand(2).getReg();
1886   LLT DstTy = MRI.getType(DstReg);
1887   LLT SrcTy = MRI.getType(Src0Reg);
1888 
1889   unsigned NumParts;
1890   LLT NarrowTy0, NarrowTy1;
1891 
1892   if (TypeIdx == 0) {
1893     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1894     unsigned OldElts = DstTy.getNumElements();
1895 
1896     NarrowTy0 = NarrowTy;
1897     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
1898     NarrowTy1 = NarrowTy.isVector() ?
1899       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
1900       SrcTy.getElementType();
1901 
1902   } else {
1903     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1904     unsigned OldElts = SrcTy.getNumElements();
1905 
1906     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
1907       NarrowTy.getNumElements();
1908     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
1909                             DstTy.getScalarSizeInBits());
1910     NarrowTy1 = NarrowTy;
1911   }
1912 
1913   // FIXME: Don't know how to handle the situation where the small vectors
1914   // aren't all the same size yet.
1915   if (NarrowTy1.isVector() &&
1916       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
1917     return UnableToLegalize;
1918 
1919   CmpInst::Predicate Pred
1920     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1921 
1922   SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1923   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
1924   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
1925 
1926   for (unsigned I = 0; I < NumParts; ++I) {
1927     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1928     DstRegs.push_back(DstReg);
1929 
1930     if (MI.getOpcode() == TargetOpcode::G_ICMP)
1931       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1932     else {
1933       MachineInstr *NewCmp
1934         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1935       NewCmp->setFlags(MI.getFlags());
1936     }
1937   }
1938 
1939   if (NarrowTy1.isVector())
1940     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1941   else
1942     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1943 
1944   MI.eraseFromParent();
1945   return Legalized;
1946 }
1947 
1948 LegalizerHelper::LegalizeResult
1949 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
1950                                            LLT NarrowTy) {
1951   unsigned DstReg = MI.getOperand(0).getReg();
1952   unsigned CondReg = MI.getOperand(1).getReg();
1953 
1954   unsigned NumParts = 0;
1955   LLT NarrowTy0, NarrowTy1;
1956 
1957   LLT DstTy = MRI.getType(DstReg);
1958   LLT CondTy = MRI.getType(CondReg);
1959   unsigned Size = DstTy.getSizeInBits();
1960 
1961   assert(TypeIdx == 0 || CondTy.isVector());
1962 
1963   if (TypeIdx == 0) {
1964     NarrowTy0 = NarrowTy;
1965     NarrowTy1 = CondTy;
1966 
1967     unsigned NarrowSize = NarrowTy0.getSizeInBits();
1968     // FIXME: Don't know how to handle the situation where the small vectors
1969     // aren't all the same size yet.
1970     if (Size % NarrowSize != 0)
1971       return UnableToLegalize;
1972 
1973     NumParts = Size / NarrowSize;
1974 
1975     // Need to break down the condition type
1976     if (CondTy.isVector()) {
1977       if (CondTy.getNumElements() == NumParts)
1978         NarrowTy1 = CondTy.getElementType();
1979       else
1980         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
1981                                 CondTy.getScalarSizeInBits());
1982     }
1983   } else {
1984     NumParts = CondTy.getNumElements();
1985     if (NarrowTy.isVector()) {
1986       // TODO: Handle uneven breakdown.
1987       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
1988         return UnableToLegalize;
1989 
1990       return UnableToLegalize;
1991     } else {
1992       NarrowTy0 = DstTy.getElementType();
1993       NarrowTy1 = NarrowTy;
1994     }
1995   }
1996 
1997   SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1998   if (CondTy.isVector())
1999     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2000 
2001   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2002   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2003 
2004   for (unsigned i = 0; i < NumParts; ++i) {
2005     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2006     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2007                            Src1Regs[i], Src2Regs[i]);
2008     DstRegs.push_back(DstReg);
2009   }
2010 
2011   if (NarrowTy0.isVector())
2012     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2013   else
2014     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2015 
2016   MI.eraseFromParent();
2017   return Legalized;
2018 }
2019 
2020 LegalizerHelper::LegalizeResult
2021 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2022                                       LLT NarrowTy) {
2023   // FIXME: Don't know how to handle secondary types yet.
2024   if (TypeIdx != 0)
2025     return UnableToLegalize;
2026 
2027   MachineMemOperand *MMO = *MI.memoperands_begin();
2028 
2029   // This implementation doesn't work for atomics. Give up instead of doing
2030   // something invalid.
2031   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2032       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
2033     return UnableToLegalize;
2034 
2035   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2036   unsigned ValReg = MI.getOperand(0).getReg();
2037   unsigned AddrReg = MI.getOperand(1).getReg();
2038   LLT ValTy = MRI.getType(ValReg);
2039 
2040   int NumParts = -1;
2041   LLT LeftoverTy;
2042   SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs;
2043   if (IsLoad) {
2044     NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2045   } else {
2046     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2047                      NarrowLeftoverRegs))
2048       NumParts = NarrowRegs.size();
2049   }
2050 
2051   if (NumParts == -1)
2052     return UnableToLegalize;
2053 
2054   const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2055 
2056   unsigned TotalSize = ValTy.getSizeInBits();
2057 
2058   // Split the load/store into PartTy sized pieces starting at Offset. If this
2059   // is a load, return the new registers in ValRegs. For a store, each elements
2060   // of ValRegs should be PartTy. Returns the next offset that needs to be
2061   // handled.
2062   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs,
2063                              unsigned Offset) -> unsigned {
2064     MachineFunction &MF = MIRBuilder.getMF();
2065     unsigned PartSize = PartTy.getSizeInBits();
2066     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2067          Offset += PartSize, ++Idx) {
2068       unsigned ByteSize = PartSize / 8;
2069       unsigned ByteOffset = Offset / 8;
2070       unsigned NewAddrReg = 0;
2071 
2072       MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2073 
2074       MachineMemOperand *NewMMO =
2075         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2076 
2077       if (IsLoad) {
2078         unsigned Dst = MRI.createGenericVirtualRegister(PartTy);
2079         ValRegs.push_back(Dst);
2080         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2081       } else {
2082         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2083       }
2084     }
2085 
2086     return Offset;
2087   };
2088 
2089   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2090 
2091   // Handle the rest of the register if this isn't an even type breakdown.
2092   if (LeftoverTy.isValid())
2093     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2094 
2095   if (IsLoad) {
2096     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2097                 LeftoverTy, NarrowLeftoverRegs);
2098   }
2099 
2100   MI.eraseFromParent();
2101   return Legalized;
2102 }
2103 
2104 LegalizerHelper::LegalizeResult
2105 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2106                                      LLT NarrowTy) {
2107   using namespace TargetOpcode;
2108 
2109   MIRBuilder.setInstr(MI);
2110   switch (MI.getOpcode()) {
2111   case G_IMPLICIT_DEF:
2112     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2113   case G_AND:
2114   case G_OR:
2115   case G_XOR:
2116   case G_ADD:
2117   case G_SUB:
2118   case G_MUL:
2119   case G_SMULH:
2120   case G_UMULH:
2121   case G_FADD:
2122   case G_FMUL:
2123   case G_FSUB:
2124   case G_FNEG:
2125   case G_FABS:
2126   case G_FCANONICALIZE:
2127   case G_FDIV:
2128   case G_FREM:
2129   case G_FMA:
2130   case G_FPOW:
2131   case G_FEXP:
2132   case G_FEXP2:
2133   case G_FLOG:
2134   case G_FLOG2:
2135   case G_FLOG10:
2136   case G_FCEIL:
2137   case G_FFLOOR:
2138   case G_INTRINSIC_ROUND:
2139   case G_INTRINSIC_TRUNC:
2140   case G_FCOS:
2141   case G_FSIN:
2142   case G_FSQRT:
2143   case G_BSWAP:
2144     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2145   case G_SHL:
2146   case G_LSHR:
2147   case G_ASHR:
2148   case G_CTLZ:
2149   case G_CTLZ_ZERO_UNDEF:
2150   case G_CTTZ:
2151   case G_CTTZ_ZERO_UNDEF:
2152   case G_CTPOP:
2153     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2154   case G_ZEXT:
2155   case G_SEXT:
2156   case G_ANYEXT:
2157   case G_FPEXT:
2158   case G_FPTRUNC:
2159   case G_SITOFP:
2160   case G_UITOFP:
2161   case G_FPTOSI:
2162   case G_FPTOUI:
2163   case G_INTTOPTR:
2164   case G_PTRTOINT:
2165   case G_ADDRSPACE_CAST:
2166     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2167   case G_ICMP:
2168   case G_FCMP:
2169     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2170   case G_SELECT:
2171     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2172   case G_LOAD:
2173   case G_STORE:
2174     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2175   default:
2176     return UnableToLegalize;
2177   }
2178 }
2179 
2180 LegalizerHelper::LegalizeResult
2181 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2182                                              const LLT HalfTy, const LLT AmtTy) {
2183 
2184   unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2185   unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2186   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2187 
2188   if (Amt.isNullValue()) {
2189     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2190     MI.eraseFromParent();
2191     return Legalized;
2192   }
2193 
2194   LLT NVT = HalfTy;
2195   unsigned NVTBits = HalfTy.getSizeInBits();
2196   unsigned VTBits = 2 * NVTBits;
2197 
2198   SrcOp Lo(0), Hi(0);
2199   if (MI.getOpcode() == TargetOpcode::G_SHL) {
2200     if (Amt.ugt(VTBits)) {
2201       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2202     } else if (Amt.ugt(NVTBits)) {
2203       Lo = MIRBuilder.buildConstant(NVT, 0);
2204       Hi = MIRBuilder.buildShl(NVT, InL,
2205                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2206     } else if (Amt == NVTBits) {
2207       Lo = MIRBuilder.buildConstant(NVT, 0);
2208       Hi = InL;
2209     } else {
2210       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2211       auto OrLHS =
2212           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2213       auto OrRHS = MIRBuilder.buildLShr(
2214           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2215       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2216     }
2217   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2218     if (Amt.ugt(VTBits)) {
2219       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2220     } else if (Amt.ugt(NVTBits)) {
2221       Lo = MIRBuilder.buildLShr(NVT, InH,
2222                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2223       Hi = MIRBuilder.buildConstant(NVT, 0);
2224     } else if (Amt == NVTBits) {
2225       Lo = InH;
2226       Hi = MIRBuilder.buildConstant(NVT, 0);
2227     } else {
2228       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2229 
2230       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2231       auto OrRHS = MIRBuilder.buildShl(
2232           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2233 
2234       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2235       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2236     }
2237   } else {
2238     if (Amt.ugt(VTBits)) {
2239       Hi = Lo = MIRBuilder.buildAShr(
2240           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2241     } else if (Amt.ugt(NVTBits)) {
2242       Lo = MIRBuilder.buildAShr(NVT, InH,
2243                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2244       Hi = MIRBuilder.buildAShr(NVT, InH,
2245                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2246     } else if (Amt == NVTBits) {
2247       Lo = InH;
2248       Hi = MIRBuilder.buildAShr(NVT, InH,
2249                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2250     } else {
2251       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2252 
2253       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2254       auto OrRHS = MIRBuilder.buildShl(
2255           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2256 
2257       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2258       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2259     }
2260   }
2261 
2262   MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2263   MI.eraseFromParent();
2264 
2265   return Legalized;
2266 }
2267 
2268 // TODO: Optimize if constant shift amount.
2269 LegalizerHelper::LegalizeResult
2270 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
2271                                    LLT RequestedTy) {
2272   if (TypeIdx == 1) {
2273     Observer.changingInstr(MI);
2274     narrowScalarSrc(MI, RequestedTy, 2);
2275     Observer.changedInstr(MI);
2276     return Legalized;
2277   }
2278 
2279   unsigned DstReg = MI.getOperand(0).getReg();
2280   LLT DstTy = MRI.getType(DstReg);
2281   if (DstTy.isVector())
2282     return UnableToLegalize;
2283 
2284   unsigned Amt = MI.getOperand(2).getReg();
2285   LLT ShiftAmtTy = MRI.getType(Amt);
2286   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2287   if (DstEltSize % 2 != 0)
2288     return UnableToLegalize;
2289 
2290   // Ignore the input type. We can only go to exactly half the size of the
2291   // input. If that isn't small enough, the resulting pieces will be further
2292   // legalized.
2293   const unsigned NewBitSize = DstEltSize / 2;
2294   const LLT HalfTy = LLT::scalar(NewBitSize);
2295   const LLT CondTy = LLT::scalar(1);
2296 
2297   if (const MachineInstr *KShiftAmt =
2298           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2299     return narrowScalarShiftByConstant(
2300         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2301   }
2302 
2303   // TODO: Expand with known bits.
2304 
2305   // Handle the fully general expansion by an unknown amount.
2306   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2307 
2308   unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2309   unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2310   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2311 
2312   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2313   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2314 
2315   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2316   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2317   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2318 
2319   unsigned ResultRegs[2];
2320   switch (MI.getOpcode()) {
2321   case TargetOpcode::G_SHL: {
2322     // Short: ShAmt < NewBitSize
2323     auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2324 
2325     auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2326     auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2327     auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2328 
2329     // Long: ShAmt >= NewBitSize
2330     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
2331     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2332 
2333     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2334     auto Hi = MIRBuilder.buildSelect(
2335         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2336 
2337     ResultRegs[0] = Lo.getReg(0);
2338     ResultRegs[1] = Hi.getReg(0);
2339     break;
2340   }
2341   case TargetOpcode::G_LSHR: {
2342     // Short: ShAmt < NewBitSize
2343     auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2344 
2345     auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2346     auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2347     auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2348 
2349     // Long: ShAmt >= NewBitSize
2350     auto HiL = MIRBuilder.buildConstant(HalfTy, 0);          // Hi part is zero.
2351     auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2352 
2353     auto Lo = MIRBuilder.buildSelect(
2354         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2355     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2356 
2357     ResultRegs[0] = Lo.getReg(0);
2358     ResultRegs[1] = Hi.getReg(0);
2359     break;
2360   }
2361   case TargetOpcode::G_ASHR: {
2362     // Short: ShAmt < NewBitSize
2363     auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2364 
2365     auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2366     auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2367     auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2368 
2369     // Long: ShAmt >= NewBitSize
2370 
2371     // Sign of Hi part.
2372     auto HiL = MIRBuilder.buildAShr(
2373         HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2374 
2375     auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2376 
2377     auto Lo = MIRBuilder.buildSelect(
2378         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2379 
2380     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2381 
2382     ResultRegs[0] = Lo.getReg(0);
2383     ResultRegs[1] = Hi.getReg(0);
2384     break;
2385   }
2386   default:
2387     llvm_unreachable("not a shift");
2388   }
2389 
2390   MIRBuilder.buildMerge(DstReg, ResultRegs);
2391   MI.eraseFromParent();
2392   return Legalized;
2393 }
2394 
2395 LegalizerHelper::LegalizeResult
2396 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
2397                                     LLT MoreTy) {
2398   MIRBuilder.setInstr(MI);
2399   unsigned Opc = MI.getOpcode();
2400   switch (Opc) {
2401   case TargetOpcode::G_IMPLICIT_DEF: {
2402     Observer.changingInstr(MI);
2403     moreElementsVectorDst(MI, MoreTy, 0);
2404     Observer.changedInstr(MI);
2405     return Legalized;
2406   }
2407   case TargetOpcode::G_AND:
2408   case TargetOpcode::G_OR:
2409   case TargetOpcode::G_XOR: {
2410     Observer.changingInstr(MI);
2411     moreElementsVectorSrc(MI, MoreTy, 1);
2412     moreElementsVectorSrc(MI, MoreTy, 2);
2413     moreElementsVectorDst(MI, MoreTy, 0);
2414     Observer.changedInstr(MI);
2415     return Legalized;
2416   }
2417   case TargetOpcode::G_EXTRACT:
2418     if (TypeIdx != 1)
2419       return UnableToLegalize;
2420     Observer.changingInstr(MI);
2421     moreElementsVectorSrc(MI, MoreTy, 1);
2422     Observer.changedInstr(MI);
2423     return Legalized;
2424   case TargetOpcode::G_INSERT:
2425     if (TypeIdx != 0)
2426       return UnableToLegalize;
2427     Observer.changingInstr(MI);
2428     moreElementsVectorSrc(MI, MoreTy, 1);
2429     moreElementsVectorDst(MI, MoreTy, 0);
2430     Observer.changedInstr(MI);
2431     return Legalized;
2432   case TargetOpcode::G_SELECT:
2433     if (TypeIdx != 0)
2434       return UnableToLegalize;
2435     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2436       return UnableToLegalize;
2437 
2438     Observer.changingInstr(MI);
2439     moreElementsVectorSrc(MI, MoreTy, 2);
2440     moreElementsVectorSrc(MI, MoreTy, 3);
2441     moreElementsVectorDst(MI, MoreTy, 0);
2442     Observer.changedInstr(MI);
2443     return Legalized;
2444   default:
2445     return UnableToLegalize;
2446   }
2447 }
2448 
2449 LegalizerHelper::LegalizeResult
2450 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) {
2451   unsigned DstReg = MI.getOperand(0).getReg();
2452   unsigned Src0 = MI.getOperand(1).getReg();
2453   unsigned Src1 = MI.getOperand(2).getReg();
2454   LLT Ty = MRI.getType(DstReg);
2455   if (Ty.isVector())
2456     return UnableToLegalize;
2457 
2458   unsigned Size = Ty.getSizeInBits();
2459   unsigned NewSize = Size / 2;
2460   if (Size != 2 * NewSize)
2461     return UnableToLegalize;
2462 
2463   LLT HalfTy = LLT::scalar(NewSize);
2464   // TODO: if HalfTy != NewTy, handle the breakdown all at once?
2465 
2466   unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
2467   unsigned Lo = MRI.createGenericVirtualRegister(HalfTy);
2468   unsigned Hi = MRI.createGenericVirtualRegister(HalfTy);
2469   unsigned ExtLo = MRI.createGenericVirtualRegister(Ty);
2470   unsigned ExtHi = MRI.createGenericVirtualRegister(Ty);
2471   unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty);
2472 
2473   SmallVector<unsigned, 2> Src0Parts;
2474   SmallVector<unsigned, 2> Src1Parts;
2475 
2476   extractParts(Src0, HalfTy, 2, Src0Parts);
2477   extractParts(Src1, HalfTy, 2, Src1Parts);
2478 
2479   MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]);
2480 
2481   // TODO: Use smulh or umulh depending on what the target has.
2482   MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]);
2483 
2484   MIRBuilder.buildConstant(ShiftAmt, NewSize);
2485   MIRBuilder.buildAnyExt(ExtHi, Hi);
2486   MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt);
2487 
2488   MIRBuilder.buildZExt(ExtLo, Lo);
2489   MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi);
2490   MI.eraseFromParent();
2491   return Legalized;
2492 }
2493 
2494 
2495 LegalizerHelper::LegalizeResult
2496 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2497                                      LLT NarrowTy) {
2498   if (TypeIdx != 1)
2499     return UnableToLegalize;
2500 
2501   uint64_t NarrowSize = NarrowTy.getSizeInBits();
2502 
2503   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2504   // FIXME: add support for when SizeOp1 isn't an exact multiple of
2505   // NarrowSize.
2506   if (SizeOp1 % NarrowSize != 0)
2507     return UnableToLegalize;
2508   int NumParts = SizeOp1 / NarrowSize;
2509 
2510   SmallVector<unsigned, 2> SrcRegs, DstRegs;
2511   SmallVector<uint64_t, 2> Indexes;
2512   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2513 
2514   unsigned OpReg = MI.getOperand(0).getReg();
2515   uint64_t OpStart = MI.getOperand(2).getImm();
2516   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2517   for (int i = 0; i < NumParts; ++i) {
2518     unsigned SrcStart = i * NarrowSize;
2519 
2520     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
2521       // No part of the extract uses this subregister, ignore it.
2522       continue;
2523     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2524       // The entire subregister is extracted, forward the value.
2525       DstRegs.push_back(SrcRegs[i]);
2526       continue;
2527     }
2528 
2529     // OpSegStart is where this destination segment would start in OpReg if it
2530     // extended infinitely in both directions.
2531     int64_t ExtractOffset;
2532     uint64_t SegSize;
2533     if (OpStart < SrcStart) {
2534       ExtractOffset = 0;
2535       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
2536     } else {
2537       ExtractOffset = OpStart - SrcStart;
2538       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
2539     }
2540 
2541     unsigned SegReg = SrcRegs[i];
2542     if (ExtractOffset != 0 || SegSize != NarrowSize) {
2543       // A genuine extract is needed.
2544       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2545       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
2546     }
2547 
2548     DstRegs.push_back(SegReg);
2549   }
2550 
2551   unsigned DstReg = MI.getOperand(0).getReg();
2552   if(MRI.getType(DstReg).isVector())
2553     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2554   else
2555     MIRBuilder.buildMerge(DstReg, DstRegs);
2556   MI.eraseFromParent();
2557   return Legalized;
2558 }
2559 
2560 LegalizerHelper::LegalizeResult
2561 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2562                                     LLT NarrowTy) {
2563   // FIXME: Don't know how to handle secondary types yet.
2564   if (TypeIdx != 0)
2565     return UnableToLegalize;
2566 
2567   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2568   uint64_t NarrowSize = NarrowTy.getSizeInBits();
2569 
2570   // FIXME: add support for when SizeOp0 isn't an exact multiple of
2571   // NarrowSize.
2572   if (SizeOp0 % NarrowSize != 0)
2573     return UnableToLegalize;
2574 
2575   int NumParts = SizeOp0 / NarrowSize;
2576 
2577   SmallVector<unsigned, 2> SrcRegs, DstRegs;
2578   SmallVector<uint64_t, 2> Indexes;
2579   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2580 
2581   unsigned OpReg = MI.getOperand(2).getReg();
2582   uint64_t OpStart = MI.getOperand(3).getImm();
2583   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2584   for (int i = 0; i < NumParts; ++i) {
2585     unsigned DstStart = i * NarrowSize;
2586 
2587     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
2588       // No part of the insert affects this subregister, forward the original.
2589       DstRegs.push_back(SrcRegs[i]);
2590       continue;
2591     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2592       // The entire subregister is defined by this insert, forward the new
2593       // value.
2594       DstRegs.push_back(OpReg);
2595       continue;
2596     }
2597 
2598     // OpSegStart is where this destination segment would start in OpReg if it
2599     // extended infinitely in both directions.
2600     int64_t ExtractOffset, InsertOffset;
2601     uint64_t SegSize;
2602     if (OpStart < DstStart) {
2603       InsertOffset = 0;
2604       ExtractOffset = DstStart - OpStart;
2605       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
2606     } else {
2607       InsertOffset = OpStart - DstStart;
2608       ExtractOffset = 0;
2609       SegSize =
2610         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
2611     }
2612 
2613     unsigned SegReg = OpReg;
2614     if (ExtractOffset != 0 || SegSize != OpSize) {
2615       // A genuine extract is needed.
2616       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2617       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
2618     }
2619 
2620     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2621     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
2622     DstRegs.push_back(DstReg);
2623   }
2624 
2625   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
2626   unsigned DstReg = MI.getOperand(0).getReg();
2627   if(MRI.getType(DstReg).isVector())
2628     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2629   else
2630     MIRBuilder.buildMerge(DstReg, DstRegs);
2631   MI.eraseFromParent();
2632   return Legalized;
2633 }
2634 
2635 LegalizerHelper::LegalizeResult
2636 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
2637                                     LLT NarrowTy) {
2638   if (TypeIdx != 0)
2639     return UnableToLegalize;
2640 
2641   unsigned CondReg = MI.getOperand(1).getReg();
2642   LLT CondTy = MRI.getType(CondReg);
2643   if (CondTy.isVector()) // TODO: Handle vselect
2644     return UnableToLegalize;
2645 
2646   unsigned DstReg = MI.getOperand(0).getReg();
2647   LLT DstTy = MRI.getType(DstReg);
2648 
2649   SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
2650   SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
2651   SmallVector<unsigned, 4> Src2Regs, Src2LeftoverRegs;
2652   LLT LeftoverTy;
2653   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
2654                     Src1Regs, Src1LeftoverRegs))
2655     return UnableToLegalize;
2656 
2657   LLT Unused;
2658   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
2659                     Src2Regs, Src2LeftoverRegs))
2660     llvm_unreachable("inconsistent extractParts result");
2661 
2662   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2663     auto Select = MIRBuilder.buildSelect(NarrowTy,
2664                                          CondReg, Src1Regs[I], Src2Regs[I]);
2665     DstRegs.push_back(Select->getOperand(0).getReg());
2666   }
2667 
2668   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2669     auto Select = MIRBuilder.buildSelect(
2670       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
2671     DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
2672   }
2673 
2674   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2675               LeftoverTy, DstLeftoverRegs);
2676 
2677   MI.eraseFromParent();
2678   return Legalized;
2679 }
2680 
2681 LegalizerHelper::LegalizeResult
2682 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2683   unsigned Opc = MI.getOpcode();
2684   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
2685   auto isSupported = [this](const LegalityQuery &Q) {
2686     auto QAction = LI.getAction(Q).Action;
2687     return QAction == Legal || QAction == Libcall || QAction == Custom;
2688   };
2689   switch (Opc) {
2690   default:
2691     return UnableToLegalize;
2692   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
2693     // This trivially expands to CTLZ.
2694     Observer.changingInstr(MI);
2695     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
2696     Observer.changedInstr(MI);
2697     return Legalized;
2698   }
2699   case TargetOpcode::G_CTLZ: {
2700     unsigned SrcReg = MI.getOperand(1).getReg();
2701     unsigned Len = Ty.getSizeInBits();
2702     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
2703       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
2704       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
2705                                              {Ty}, {SrcReg});
2706       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2707       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2708       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2709                                           SrcReg, MIBZero);
2710       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2711                              MIBCtlzZU);
2712       MI.eraseFromParent();
2713       return Legalized;
2714     }
2715     // for now, we do this:
2716     // NewLen = NextPowerOf2(Len);
2717     // x = x | (x >> 1);
2718     // x = x | (x >> 2);
2719     // ...
2720     // x = x | (x >>16);
2721     // x = x | (x >>32); // for 64-bit input
2722     // Upto NewLen/2
2723     // return Len - popcount(x);
2724     //
2725     // Ref: "Hacker's Delight" by Henry Warren
2726     unsigned Op = SrcReg;
2727     unsigned NewLen = PowerOf2Ceil(Len);
2728     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
2729       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
2730       auto MIBOp = MIRBuilder.buildInstr(
2731           TargetOpcode::G_OR, {Ty},
2732           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
2733                                      {Op, MIBShiftAmt})});
2734       Op = MIBOp->getOperand(0).getReg();
2735     }
2736     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
2737     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2738                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
2739     MI.eraseFromParent();
2740     return Legalized;
2741   }
2742   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
2743     // This trivially expands to CTTZ.
2744     Observer.changingInstr(MI);
2745     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
2746     Observer.changedInstr(MI);
2747     return Legalized;
2748   }
2749   case TargetOpcode::G_CTTZ: {
2750     unsigned SrcReg = MI.getOperand(1).getReg();
2751     unsigned Len = Ty.getSizeInBits();
2752     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
2753       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
2754       // zero.
2755       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
2756                                              {Ty}, {SrcReg});
2757       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2758       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2759       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2760                                           SrcReg, MIBZero);
2761       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2762                              MIBCttzZU);
2763       MI.eraseFromParent();
2764       return Legalized;
2765     }
2766     // for now, we use: { return popcount(~x & (x - 1)); }
2767     // unless the target has ctlz but not ctpop, in which case we use:
2768     // { return 32 - nlz(~x & (x-1)); }
2769     // Ref: "Hacker's Delight" by Henry Warren
2770     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
2771     auto MIBNot =
2772         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
2773     auto MIBTmp = MIRBuilder.buildInstr(
2774         TargetOpcode::G_AND, {Ty},
2775         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
2776                                        {SrcReg, MIBCstNeg1})});
2777     if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
2778         isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
2779       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
2780       MIRBuilder.buildInstr(
2781           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2782           {MIBCstLen,
2783            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
2784       MI.eraseFromParent();
2785       return Legalized;
2786     }
2787     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
2788     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
2789     return Legalized;
2790   }
2791   }
2792 }
2793