1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 if the breakdown is not satisfiable.
38 static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
39   assert(!LeftoverTy.isValid() && "this is an out argument");
40 
41   unsigned Size = OrigTy.getSizeInBits();
42   unsigned NarrowSize = NarrowTy.getSizeInBits();
43   unsigned NumParts = Size / NarrowSize;
44   unsigned LeftoverSize = Size - NumParts * NarrowSize;
45   assert(Size > NarrowSize);
46 
47   if (LeftoverSize == 0)
48     return NumParts;
49 
50   if (NarrowTy.isVector()) {
51     unsigned EltSize = OrigTy.getScalarSizeInBits();
52     if (LeftoverSize % EltSize != 0)
53       return -1;
54     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
55   } else {
56     LeftoverTy = LLT::scalar(LeftoverSize);
57   }
58 
59   return NumParts;
60 }
61 
62 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
63                                  GISelChangeObserver &Observer,
64                                  MachineIRBuilder &Builder)
65     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
66       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
67   MIRBuilder.setMF(MF);
68   MIRBuilder.setChangeObserver(Observer);
69 }
70 
71 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
72                                  GISelChangeObserver &Observer,
73                                  MachineIRBuilder &B)
74     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
75   MIRBuilder.setMF(MF);
76   MIRBuilder.setChangeObserver(Observer);
77 }
78 LegalizerHelper::LegalizeResult
79 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
80   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
81 
82   auto Step = LI.getAction(MI, MRI);
83   switch (Step.Action) {
84   case Legal:
85     LLVM_DEBUG(dbgs() << ".. Already legal\n");
86     return AlreadyLegal;
87   case Libcall:
88     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
89     return libcall(MI);
90   case NarrowScalar:
91     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
92     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
93   case WidenScalar:
94     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
95     return widenScalar(MI, Step.TypeIdx, Step.NewType);
96   case Lower:
97     LLVM_DEBUG(dbgs() << ".. Lower\n");
98     return lower(MI, Step.TypeIdx, Step.NewType);
99   case FewerElements:
100     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
101     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
102   case MoreElements:
103     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
104     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
105   case Custom:
106     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
107     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
108                                                             : UnableToLegalize;
109   default:
110     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
111     return UnableToLegalize;
112   }
113 }
114 
115 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
116                                    SmallVectorImpl<unsigned> &VRegs) {
117   for (int i = 0; i < NumParts; ++i)
118     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
119   MIRBuilder.buildUnmerge(VRegs, Reg);
120 }
121 
122 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy,
123                                    LLT MainTy, LLT &LeftoverTy,
124                                    SmallVectorImpl<unsigned> &VRegs,
125                                    SmallVectorImpl<unsigned> &LeftoverRegs) {
126   assert(!LeftoverTy.isValid() && "this is an out argument");
127 
128   unsigned RegSize = RegTy.getSizeInBits();
129   unsigned MainSize = MainTy.getSizeInBits();
130   unsigned NumParts = RegSize / MainSize;
131   unsigned LeftoverSize = RegSize - NumParts * MainSize;
132 
133   // Use an unmerge when possible.
134   if (LeftoverSize == 0) {
135     for (unsigned I = 0; I < NumParts; ++I)
136       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
137     MIRBuilder.buildUnmerge(VRegs, Reg);
138     return true;
139   }
140 
141   if (MainTy.isVector()) {
142     unsigned EltSize = MainTy.getScalarSizeInBits();
143     if (LeftoverSize % EltSize != 0)
144       return false;
145     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
146   } else {
147     LeftoverTy = LLT::scalar(LeftoverSize);
148   }
149 
150   // For irregular sizes, extract the individual parts.
151   for (unsigned I = 0; I != NumParts; ++I) {
152     unsigned NewReg = MRI.createGenericVirtualRegister(MainTy);
153     VRegs.push_back(NewReg);
154     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
155   }
156 
157   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
158        Offset += LeftoverSize) {
159     unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
160     LeftoverRegs.push_back(NewReg);
161     MIRBuilder.buildExtract(NewReg, Reg, Offset);
162   }
163 
164   return true;
165 }
166 
167 void LegalizerHelper::insertParts(unsigned DstReg,
168                                   LLT ResultTy, LLT PartTy,
169                                   ArrayRef<unsigned> PartRegs,
170                                   LLT LeftoverTy,
171                                   ArrayRef<unsigned> LeftoverRegs) {
172   if (!LeftoverTy.isValid()) {
173     assert(LeftoverRegs.empty());
174 
175     if (!ResultTy.isVector()) {
176       MIRBuilder.buildMerge(DstReg, PartRegs);
177       return;
178     }
179 
180     if (PartTy.isVector())
181       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
182     else
183       MIRBuilder.buildBuildVector(DstReg, PartRegs);
184     return;
185   }
186 
187   unsigned PartSize = PartTy.getSizeInBits();
188   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
189 
190   unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
191   MIRBuilder.buildUndef(CurResultReg);
192 
193   unsigned Offset = 0;
194   for (unsigned PartReg : PartRegs) {
195     unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
196     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
197     CurResultReg = NewResultReg;
198     Offset += PartSize;
199   }
200 
201   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
202     // Use the original output register for the final insert to avoid a copy.
203     unsigned NewResultReg = (I + 1 == E) ?
204       DstReg : MRI.createGenericVirtualRegister(ResultTy);
205 
206     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
207     CurResultReg = NewResultReg;
208     Offset += LeftoverPartSize;
209   }
210 }
211 
212 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
213   switch (Opcode) {
214   case TargetOpcode::G_SDIV:
215     assert((Size == 32 || Size == 64) && "Unsupported size");
216     return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
217   case TargetOpcode::G_UDIV:
218     assert((Size == 32 || Size == 64) && "Unsupported size");
219     return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
220   case TargetOpcode::G_SREM:
221     assert((Size == 32 || Size == 64) && "Unsupported size");
222     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
223   case TargetOpcode::G_UREM:
224     assert((Size == 32 || Size == 64) && "Unsupported size");
225     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
226   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
227     assert(Size == 32 && "Unsupported size");
228     return RTLIB::CTLZ_I32;
229   case TargetOpcode::G_FADD:
230     assert((Size == 32 || Size == 64) && "Unsupported size");
231     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
232   case TargetOpcode::G_FSUB:
233     assert((Size == 32 || Size == 64) && "Unsupported size");
234     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
235   case TargetOpcode::G_FMUL:
236     assert((Size == 32 || Size == 64) && "Unsupported size");
237     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
238   case TargetOpcode::G_FDIV:
239     assert((Size == 32 || Size == 64) && "Unsupported size");
240     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
241   case TargetOpcode::G_FEXP:
242     assert((Size == 32 || Size == 64) && "Unsupported size");
243     return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
244   case TargetOpcode::G_FREM:
245     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
246   case TargetOpcode::G_FPOW:
247     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
248   case TargetOpcode::G_FMA:
249     assert((Size == 32 || Size == 64) && "Unsupported size");
250     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
251   case TargetOpcode::G_FSIN:
252     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
253     return Size == 128 ? RTLIB::SIN_F128
254                        : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
255   case TargetOpcode::G_FCOS:
256     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
257     return Size == 128 ? RTLIB::COS_F128
258                        : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
259   case TargetOpcode::G_FLOG10:
260     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
261     return Size == 128 ? RTLIB::LOG10_F128
262                        : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
263   case TargetOpcode::G_FLOG:
264     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
265     return Size == 128 ? RTLIB::LOG_F128
266                        : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
267   case TargetOpcode::G_FLOG2:
268     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
269     return Size == 128 ? RTLIB::LOG2_F128
270                        : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
271   }
272   llvm_unreachable("Unknown libcall function");
273 }
274 
275 LegalizerHelper::LegalizeResult
276 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
277                     const CallLowering::ArgInfo &Result,
278                     ArrayRef<CallLowering::ArgInfo> Args) {
279   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
280   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
281   const char *Name = TLI.getLibcallName(Libcall);
282 
283   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
284   if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
285                      MachineOperand::CreateES(Name), Result, Args))
286     return LegalizerHelper::UnableToLegalize;
287 
288   return LegalizerHelper::Legalized;
289 }
290 
291 // Useful for libcalls where all operands have the same type.
292 static LegalizerHelper::LegalizeResult
293 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
294               Type *OpType) {
295   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
296 
297   SmallVector<CallLowering::ArgInfo, 3> Args;
298   for (unsigned i = 1; i < MI.getNumOperands(); i++)
299     Args.push_back({MI.getOperand(i).getReg(), OpType});
300   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
301                        Args);
302 }
303 
304 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
305                                        Type *FromType) {
306   auto ToMVT = MVT::getVT(ToType);
307   auto FromMVT = MVT::getVT(FromType);
308 
309   switch (Opcode) {
310   case TargetOpcode::G_FPEXT:
311     return RTLIB::getFPEXT(FromMVT, ToMVT);
312   case TargetOpcode::G_FPTRUNC:
313     return RTLIB::getFPROUND(FromMVT, ToMVT);
314   case TargetOpcode::G_FPTOSI:
315     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
316   case TargetOpcode::G_FPTOUI:
317     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
318   case TargetOpcode::G_SITOFP:
319     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
320   case TargetOpcode::G_UITOFP:
321     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
322   }
323   llvm_unreachable("Unsupported libcall function");
324 }
325 
326 static LegalizerHelper::LegalizeResult
327 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
328                   Type *FromType) {
329   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
330   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
331                        {{MI.getOperand(1).getReg(), FromType}});
332 }
333 
334 LegalizerHelper::LegalizeResult
335 LegalizerHelper::libcall(MachineInstr &MI) {
336   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
337   unsigned Size = LLTy.getSizeInBits();
338   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
339 
340   MIRBuilder.setInstr(MI);
341 
342   switch (MI.getOpcode()) {
343   default:
344     return UnableToLegalize;
345   case TargetOpcode::G_SDIV:
346   case TargetOpcode::G_UDIV:
347   case TargetOpcode::G_SREM:
348   case TargetOpcode::G_UREM:
349   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
350     Type *HLTy = IntegerType::get(Ctx, Size);
351     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
352     if (Status != Legalized)
353       return Status;
354     break;
355   }
356   case TargetOpcode::G_FADD:
357   case TargetOpcode::G_FSUB:
358   case TargetOpcode::G_FMUL:
359   case TargetOpcode::G_FDIV:
360   case TargetOpcode::G_FMA:
361   case TargetOpcode::G_FPOW:
362   case TargetOpcode::G_FREM:
363   case TargetOpcode::G_FCOS:
364   case TargetOpcode::G_FSIN:
365   case TargetOpcode::G_FLOG10:
366   case TargetOpcode::G_FLOG:
367   case TargetOpcode::G_FLOG2:
368   case TargetOpcode::G_FEXP: {
369     if (Size > 64) {
370       LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
371       return UnableToLegalize;
372     }
373     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
374     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
375     if (Status != Legalized)
376       return Status;
377     break;
378   }
379   case TargetOpcode::G_FPEXT: {
380     // FIXME: Support other floating point types (half, fp128 etc)
381     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
382     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
383     if (ToSize != 64 || FromSize != 32)
384       return UnableToLegalize;
385     LegalizeResult Status = conversionLibcall(
386         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
387     if (Status != Legalized)
388       return Status;
389     break;
390   }
391   case TargetOpcode::G_FPTRUNC: {
392     // FIXME: Support other floating point types (half, fp128 etc)
393     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
394     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
395     if (ToSize != 32 || FromSize != 64)
396       return UnableToLegalize;
397     LegalizeResult Status = conversionLibcall(
398         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
399     if (Status != Legalized)
400       return Status;
401     break;
402   }
403   case TargetOpcode::G_FPTOSI:
404   case TargetOpcode::G_FPTOUI: {
405     // FIXME: Support other types
406     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
407     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
408     if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
409       return UnableToLegalize;
410     LegalizeResult Status = conversionLibcall(
411         MI, MIRBuilder, Type::getInt32Ty(Ctx),
412         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
413     if (Status != Legalized)
414       return Status;
415     break;
416   }
417   case TargetOpcode::G_SITOFP:
418   case TargetOpcode::G_UITOFP: {
419     // FIXME: Support other types
420     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
421     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
422     if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
423       return UnableToLegalize;
424     LegalizeResult Status = conversionLibcall(
425         MI, MIRBuilder,
426         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
427         Type::getInt32Ty(Ctx));
428     if (Status != Legalized)
429       return Status;
430     break;
431   }
432   }
433 
434   MI.eraseFromParent();
435   return Legalized;
436 }
437 
438 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
439                                                               unsigned TypeIdx,
440                                                               LLT NarrowTy) {
441   MIRBuilder.setInstr(MI);
442 
443   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
444   uint64_t NarrowSize = NarrowTy.getSizeInBits();
445 
446   switch (MI.getOpcode()) {
447   default:
448     return UnableToLegalize;
449   case TargetOpcode::G_IMPLICIT_DEF: {
450     // FIXME: add support for when SizeOp0 isn't an exact multiple of
451     // NarrowSize.
452     if (SizeOp0 % NarrowSize != 0)
453       return UnableToLegalize;
454     int NumParts = SizeOp0 / NarrowSize;
455 
456     SmallVector<unsigned, 2> DstRegs;
457     for (int i = 0; i < NumParts; ++i)
458       DstRegs.push_back(
459           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
460 
461     unsigned DstReg = MI.getOperand(0).getReg();
462     if(MRI.getType(DstReg).isVector())
463       MIRBuilder.buildBuildVector(DstReg, DstRegs);
464     else
465       MIRBuilder.buildMerge(DstReg, DstRegs);
466     MI.eraseFromParent();
467     return Legalized;
468   }
469   case TargetOpcode::G_ADD: {
470     // FIXME: add support for when SizeOp0 isn't an exact multiple of
471     // NarrowSize.
472     if (SizeOp0 % NarrowSize != 0)
473       return UnableToLegalize;
474     // Expand in terms of carry-setting/consuming G_ADDE instructions.
475     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
476 
477     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
478     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
479     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
480 
481     unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
482     MIRBuilder.buildConstant(CarryIn, 0);
483 
484     for (int i = 0; i < NumParts; ++i) {
485       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
486       unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
487 
488       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
489                             Src2Regs[i], CarryIn);
490 
491       DstRegs.push_back(DstReg);
492       CarryIn = CarryOut;
493     }
494     unsigned DstReg = MI.getOperand(0).getReg();
495     if(MRI.getType(DstReg).isVector())
496       MIRBuilder.buildBuildVector(DstReg, DstRegs);
497     else
498       MIRBuilder.buildMerge(DstReg, DstRegs);
499     MI.eraseFromParent();
500     return Legalized;
501   }
502   case TargetOpcode::G_SUB: {
503     // FIXME: add support for when SizeOp0 isn't an exact multiple of
504     // NarrowSize.
505     if (SizeOp0 % NarrowSize != 0)
506       return UnableToLegalize;
507 
508     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
509 
510     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
511     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
512     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
513 
514     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
515     unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
516     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
517                           {Src1Regs[0], Src2Regs[0]});
518     DstRegs.push_back(DstReg);
519     unsigned BorrowIn = BorrowOut;
520     for (int i = 1; i < NumParts; ++i) {
521       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
522       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
523 
524       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
525                             {Src1Regs[i], Src2Regs[i], BorrowIn});
526 
527       DstRegs.push_back(DstReg);
528       BorrowIn = BorrowOut;
529     }
530     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
531     MI.eraseFromParent();
532     return Legalized;
533   }
534   case TargetOpcode::G_MUL:
535     return narrowScalarMul(MI, TypeIdx, NarrowTy);
536   case TargetOpcode::G_EXTRACT:
537     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
538   case TargetOpcode::G_INSERT:
539     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
540   case TargetOpcode::G_LOAD: {
541     const auto &MMO = **MI.memoperands_begin();
542     unsigned DstReg = MI.getOperand(0).getReg();
543     LLT DstTy = MRI.getType(DstReg);
544     if (DstTy.isVector())
545       return UnableToLegalize;
546 
547     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
548       unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
549       auto &MMO = **MI.memoperands_begin();
550       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
551       MIRBuilder.buildAnyExt(DstReg, TmpReg);
552       MI.eraseFromParent();
553       return Legalized;
554     }
555 
556     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
557   }
558   case TargetOpcode::G_ZEXTLOAD:
559   case TargetOpcode::G_SEXTLOAD: {
560     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
561     unsigned DstReg = MI.getOperand(0).getReg();
562     unsigned PtrReg = MI.getOperand(1).getReg();
563 
564     unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
565     auto &MMO = **MI.memoperands_begin();
566     if (MMO.getSize() * 8 == NarrowSize) {
567       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
568     } else {
569       unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
570         : TargetOpcode::G_SEXTLOAD;
571       MIRBuilder.buildInstr(ExtLoad)
572         .addDef(TmpReg)
573         .addUse(PtrReg)
574         .addMemOperand(&MMO);
575     }
576 
577     if (ZExt)
578       MIRBuilder.buildZExt(DstReg, TmpReg);
579     else
580       MIRBuilder.buildSExt(DstReg, TmpReg);
581 
582     MI.eraseFromParent();
583     return Legalized;
584   }
585   case TargetOpcode::G_STORE: {
586     const auto &MMO = **MI.memoperands_begin();
587 
588     unsigned SrcReg = MI.getOperand(0).getReg();
589     LLT SrcTy = MRI.getType(SrcReg);
590     if (SrcTy.isVector())
591       return UnableToLegalize;
592 
593     int NumParts = SizeOp0 / NarrowSize;
594     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
595     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
596     if (SrcTy.isVector() && LeftoverBits != 0)
597       return UnableToLegalize;
598 
599     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
600       unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
601       auto &MMO = **MI.memoperands_begin();
602       MIRBuilder.buildTrunc(TmpReg, SrcReg);
603       MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
604       MI.eraseFromParent();
605       return Legalized;
606     }
607 
608     return reduceLoadStoreWidth(MI, 0, NarrowTy);
609   }
610   case TargetOpcode::G_CONSTANT: {
611     // FIXME: add support for when SizeOp0 isn't an exact multiple of
612     // NarrowSize.
613     if (SizeOp0 % NarrowSize != 0)
614       return UnableToLegalize;
615     int NumParts = SizeOp0 / NarrowSize;
616     const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
617     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
618 
619     SmallVector<unsigned, 2> DstRegs;
620     for (int i = 0; i < NumParts; ++i) {
621       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
622       ConstantInt *CI =
623           ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
624       MIRBuilder.buildConstant(DstReg, *CI);
625       DstRegs.push_back(DstReg);
626     }
627     unsigned DstReg = MI.getOperand(0).getReg();
628     if(MRI.getType(DstReg).isVector())
629       MIRBuilder.buildBuildVector(DstReg, DstRegs);
630     else
631       MIRBuilder.buildMerge(DstReg, DstRegs);
632     MI.eraseFromParent();
633     return Legalized;
634   }
635   case TargetOpcode::G_SELECT:
636     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
637   case TargetOpcode::G_AND:
638   case TargetOpcode::G_OR:
639   case TargetOpcode::G_XOR: {
640     // Legalize bitwise operation:
641     // A = BinOp<Ty> B, C
642     // into:
643     // B1, ..., BN = G_UNMERGE_VALUES B
644     // C1, ..., CN = G_UNMERGE_VALUES C
645     // A1 = BinOp<Ty/N> B1, C2
646     // ...
647     // AN = BinOp<Ty/N> BN, CN
648     // A = G_MERGE_VALUES A1, ..., AN
649 
650     // FIXME: add support for when SizeOp0 isn't an exact multiple of
651     // NarrowSize.
652     if (SizeOp0 % NarrowSize != 0)
653       return UnableToLegalize;
654     int NumParts = SizeOp0 / NarrowSize;
655 
656     // List the registers where the destination will be scattered.
657     SmallVector<unsigned, 2> DstRegs;
658     // List the registers where the first argument will be split.
659     SmallVector<unsigned, 2> SrcsReg1;
660     // List the registers where the second argument will be split.
661     SmallVector<unsigned, 2> SrcsReg2;
662     // Create all the temporary registers.
663     for (int i = 0; i < NumParts; ++i) {
664       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
665       unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
666       unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
667 
668       DstRegs.push_back(DstReg);
669       SrcsReg1.push_back(SrcReg1);
670       SrcsReg2.push_back(SrcReg2);
671     }
672     // Explode the big arguments into smaller chunks.
673     MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
674     MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
675 
676     // Do the operation on each small part.
677     for (int i = 0; i < NumParts; ++i)
678       MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
679                             {SrcsReg1[i], SrcsReg2[i]});
680 
681     // Gather the destination registers into the final destination.
682     unsigned DstReg = MI.getOperand(0).getReg();
683     if(MRI.getType(DstReg).isVector())
684       MIRBuilder.buildBuildVector(DstReg, DstRegs);
685     else
686       MIRBuilder.buildMerge(DstReg, DstRegs);
687     MI.eraseFromParent();
688     return Legalized;
689   }
690   case TargetOpcode::G_SHL:
691   case TargetOpcode::G_LSHR:
692   case TargetOpcode::G_ASHR:
693     return narrowScalarShift(MI, TypeIdx, NarrowTy);
694   case TargetOpcode::G_CTLZ:
695   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
696   case TargetOpcode::G_CTTZ:
697   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
698   case TargetOpcode::G_CTPOP:
699     if (TypeIdx != 0)
700       return UnableToLegalize; // TODO
701 
702     Observer.changingInstr(MI);
703     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
704     Observer.changedInstr(MI);
705     return Legalized;
706   case TargetOpcode::G_INTTOPTR:
707     if (TypeIdx != 1)
708       return UnableToLegalize;
709 
710     Observer.changingInstr(MI);
711     narrowScalarSrc(MI, NarrowTy, 1);
712     Observer.changedInstr(MI);
713     return Legalized;
714   case TargetOpcode::G_PTRTOINT:
715     if (TypeIdx != 0)
716       return UnableToLegalize;
717 
718     Observer.changingInstr(MI);
719     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
720     Observer.changedInstr(MI);
721     return Legalized;
722   }
723 }
724 
725 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
726                                      unsigned OpIdx, unsigned ExtOpcode) {
727   MachineOperand &MO = MI.getOperand(OpIdx);
728   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
729   MO.setReg(ExtB->getOperand(0).getReg());
730 }
731 
732 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
733                                       unsigned OpIdx) {
734   MachineOperand &MO = MI.getOperand(OpIdx);
735   auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
736                                     {MO.getReg()});
737   MO.setReg(ExtB->getOperand(0).getReg());
738 }
739 
740 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
741                                      unsigned OpIdx, unsigned TruncOpcode) {
742   MachineOperand &MO = MI.getOperand(OpIdx);
743   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
744   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
745   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
746   MO.setReg(DstExt);
747 }
748 
749 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
750                                       unsigned OpIdx, unsigned ExtOpcode) {
751   MachineOperand &MO = MI.getOperand(OpIdx);
752   unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
753   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
754   MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
755   MO.setReg(DstTrunc);
756 }
757 
758 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
759                                             unsigned OpIdx) {
760   MachineOperand &MO = MI.getOperand(OpIdx);
761   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
762   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
763   MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
764   MO.setReg(DstExt);
765 }
766 
767 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
768                                             unsigned OpIdx) {
769   MachineOperand &MO = MI.getOperand(OpIdx);
770 
771   LLT OldTy = MRI.getType(MO.getReg());
772   unsigned OldElts = OldTy.getNumElements();
773   unsigned NewElts = MoreTy.getNumElements();
774 
775   unsigned NumParts = NewElts / OldElts;
776 
777   // Use concat_vectors if the result is a multiple of the number of elements.
778   if (NumParts * OldElts == NewElts) {
779     SmallVector<unsigned, 8> Parts;
780     Parts.push_back(MO.getReg());
781 
782     unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
783     for (unsigned I = 1; I != NumParts; ++I)
784       Parts.push_back(ImpDef);
785 
786     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
787     MO.setReg(Concat.getReg(0));
788     return;
789   }
790 
791   unsigned MoreReg = MRI.createGenericVirtualRegister(MoreTy);
792   unsigned ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
793   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
794   MO.setReg(MoreReg);
795 }
796 
797 LegalizerHelper::LegalizeResult
798 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
799                                         LLT WideTy) {
800   if (TypeIdx != 1)
801     return UnableToLegalize;
802 
803   unsigned DstReg = MI.getOperand(0).getReg();
804   LLT DstTy = MRI.getType(DstReg);
805   if (!DstTy.isScalar())
806     return UnableToLegalize;
807 
808   unsigned NumOps = MI.getNumOperands();
809   unsigned NumSrc = MI.getNumOperands() - 1;
810   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
811 
812   unsigned Src1 = MI.getOperand(1).getReg();
813   unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg();
814 
815   for (unsigned I = 2; I != NumOps; ++I) {
816     const unsigned Offset = (I - 1) * PartSize;
817 
818     unsigned SrcReg = MI.getOperand(I).getReg();
819     assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
820 
821     auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
822 
823     unsigned NextResult = I + 1 == NumOps ? DstReg :
824       MRI.createGenericVirtualRegister(DstTy);
825 
826     auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
827     auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
828     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
829     ResultReg = NextResult;
830   }
831 
832   MI.eraseFromParent();
833   return Legalized;
834 }
835 
836 LegalizerHelper::LegalizeResult
837 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
838                                           LLT WideTy) {
839   if (TypeIdx != 0)
840     return UnableToLegalize;
841 
842   unsigned NumDst = MI.getNumOperands() - 1;
843   unsigned SrcReg = MI.getOperand(NumDst).getReg();
844   LLT SrcTy = MRI.getType(SrcReg);
845   if (!SrcTy.isScalar())
846     return UnableToLegalize;
847 
848   unsigned Dst0Reg = MI.getOperand(0).getReg();
849   LLT DstTy = MRI.getType(Dst0Reg);
850   if (!DstTy.isScalar())
851     return UnableToLegalize;
852 
853   unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
854   LLT NewSrcTy = LLT::scalar(NewSrcSize);
855   unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
856 
857   auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
858 
859   for (unsigned I = 1; I != NumDst; ++I) {
860     auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
861     auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
862     WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
863   }
864 
865   Observer.changingInstr(MI);
866 
867   MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
868   for (unsigned I = 0; I != NumDst; ++I)
869     widenScalarDst(MI, WideTy, I);
870 
871   Observer.changedInstr(MI);
872 
873   return Legalized;
874 }
875 
876 LegalizerHelper::LegalizeResult
877 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
878                                     LLT WideTy) {
879   unsigned DstReg = MI.getOperand(0).getReg();
880   unsigned SrcReg = MI.getOperand(1).getReg();
881   LLT SrcTy = MRI.getType(SrcReg);
882 
883   LLT DstTy = MRI.getType(DstReg);
884   unsigned Offset = MI.getOperand(2).getImm();
885 
886   if (TypeIdx == 0) {
887     if (SrcTy.isVector() || DstTy.isVector())
888       return UnableToLegalize;
889 
890     SrcOp Src(SrcReg);
891     if (SrcTy.isPointer()) {
892       // Extracts from pointers can be handled only if they are really just
893       // simple integers.
894       const DataLayout &DL = MIRBuilder.getDataLayout();
895       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
896         return UnableToLegalize;
897 
898       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
899       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
900       SrcTy = SrcAsIntTy;
901     }
902 
903     if (DstTy.isPointer())
904       return UnableToLegalize;
905 
906     if (Offset == 0) {
907       // Avoid a shift in the degenerate case.
908       MIRBuilder.buildTrunc(DstReg,
909                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
910       MI.eraseFromParent();
911       return Legalized;
912     }
913 
914     // Do a shift in the source type.
915     LLT ShiftTy = SrcTy;
916     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
917       Src = MIRBuilder.buildAnyExt(WideTy, Src);
918       ShiftTy = WideTy;
919     } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
920       return UnableToLegalize;
921 
922     auto LShr = MIRBuilder.buildLShr(
923       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
924     MIRBuilder.buildTrunc(DstReg, LShr);
925     MI.eraseFromParent();
926     return Legalized;
927   }
928 
929   if (!SrcTy.isVector())
930     return UnableToLegalize;
931 
932   if (DstTy != SrcTy.getElementType())
933     return UnableToLegalize;
934 
935   if (Offset % SrcTy.getScalarSizeInBits() != 0)
936     return UnableToLegalize;
937 
938   Observer.changingInstr(MI);
939   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
940 
941   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
942                           Offset);
943   widenScalarDst(MI, WideTy.getScalarType(), 0);
944   Observer.changedInstr(MI);
945   return Legalized;
946 }
947 
948 LegalizerHelper::LegalizeResult
949 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
950                                    LLT WideTy) {
951   if (TypeIdx != 0)
952     return UnableToLegalize;
953   Observer.changingInstr(MI);
954   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
955   widenScalarDst(MI, WideTy);
956   Observer.changedInstr(MI);
957   return Legalized;
958 }
959 
960 LegalizerHelper::LegalizeResult
961 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
962   MIRBuilder.setInstr(MI);
963 
964   switch (MI.getOpcode()) {
965   default:
966     return UnableToLegalize;
967   case TargetOpcode::G_EXTRACT:
968     return widenScalarExtract(MI, TypeIdx, WideTy);
969   case TargetOpcode::G_INSERT:
970     return widenScalarInsert(MI, TypeIdx, WideTy);
971   case TargetOpcode::G_MERGE_VALUES:
972     return widenScalarMergeValues(MI, TypeIdx, WideTy);
973   case TargetOpcode::G_UNMERGE_VALUES:
974     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
975   case TargetOpcode::G_UADDO:
976   case TargetOpcode::G_USUBO: {
977     if (TypeIdx == 1)
978       return UnableToLegalize; // TODO
979     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
980                                          {MI.getOperand(2).getReg()});
981     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
982                                          {MI.getOperand(3).getReg()});
983     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
984                           ? TargetOpcode::G_ADD
985                           : TargetOpcode::G_SUB;
986     // Do the arithmetic in the larger type.
987     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
988     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
989     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
990     auto AndOp = MIRBuilder.buildInstr(
991         TargetOpcode::G_AND, {WideTy},
992         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
993     // There is no overflow if the AndOp is the same as NewOp.
994     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
995                          AndOp);
996     // Now trunc the NewOp to the original result.
997     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
998     MI.eraseFromParent();
999     return Legalized;
1000   }
1001   case TargetOpcode::G_CTTZ:
1002   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1003   case TargetOpcode::G_CTLZ:
1004   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1005   case TargetOpcode::G_CTPOP: {
1006     if (TypeIdx == 0) {
1007       Observer.changingInstr(MI);
1008       widenScalarDst(MI, WideTy, 0);
1009       Observer.changedInstr(MI);
1010       return Legalized;
1011     }
1012 
1013     unsigned SrcReg = MI.getOperand(1).getReg();
1014 
1015     // First ZEXT the input.
1016     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1017     LLT CurTy = MRI.getType(SrcReg);
1018     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1019       // The count is the same in the larger type except if the original
1020       // value was zero.  This can be handled by setting the bit just off
1021       // the top of the original type.
1022       auto TopBit =
1023           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1024       MIBSrc = MIRBuilder.buildOr(
1025         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1026     }
1027 
1028     // Perform the operation at the larger size.
1029     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1030     // This is already the correct result for CTPOP and CTTZs
1031     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1032         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1033       // The correct result is NewOp - (Difference in widety and current ty).
1034       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1035       MIBNewOp = MIRBuilder.buildInstr(
1036           TargetOpcode::G_SUB, {WideTy},
1037           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1038     }
1039 
1040     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1041     MI.eraseFromParent();
1042     return Legalized;
1043   }
1044   case TargetOpcode::G_BSWAP: {
1045     Observer.changingInstr(MI);
1046     unsigned DstReg = MI.getOperand(0).getReg();
1047 
1048     unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy);
1049     unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
1050     unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1051     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1052 
1053     MI.getOperand(0).setReg(DstExt);
1054 
1055     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1056 
1057     LLT Ty = MRI.getType(DstReg);
1058     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1059     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1060     MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1061       .addDef(ShrReg)
1062       .addUse(DstExt)
1063       .addUse(ShiftAmtReg);
1064 
1065     MIRBuilder.buildTrunc(DstReg, ShrReg);
1066     Observer.changedInstr(MI);
1067     return Legalized;
1068   }
1069   case TargetOpcode::G_ADD:
1070   case TargetOpcode::G_AND:
1071   case TargetOpcode::G_MUL:
1072   case TargetOpcode::G_OR:
1073   case TargetOpcode::G_XOR:
1074   case TargetOpcode::G_SUB:
1075     // Perform operation at larger width (any extension is fines here, high bits
1076     // don't affect the result) and then truncate the result back to the
1077     // original type.
1078     Observer.changingInstr(MI);
1079     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1080     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1081     widenScalarDst(MI, WideTy);
1082     Observer.changedInstr(MI);
1083     return Legalized;
1084 
1085   case TargetOpcode::G_SHL:
1086       Observer.changingInstr(MI);
1087 
1088     if (TypeIdx == 0) {
1089       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1090       widenScalarDst(MI, WideTy);
1091     } else {
1092       assert(TypeIdx == 1);
1093       // The "number of bits to shift" operand must preserve its value as an
1094       // unsigned integer:
1095       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1096     }
1097 
1098     Observer.changedInstr(MI);
1099     return Legalized;
1100 
1101   case TargetOpcode::G_SDIV:
1102   case TargetOpcode::G_SREM:
1103     Observer.changingInstr(MI);
1104     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1105     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1106     widenScalarDst(MI, WideTy);
1107     Observer.changedInstr(MI);
1108     return Legalized;
1109 
1110   case TargetOpcode::G_ASHR:
1111   case TargetOpcode::G_LSHR:
1112     Observer.changingInstr(MI);
1113 
1114     if (TypeIdx == 0) {
1115       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1116         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1117 
1118       widenScalarSrc(MI, WideTy, 1, CvtOp);
1119       widenScalarDst(MI, WideTy);
1120     } else {
1121       assert(TypeIdx == 1);
1122       // The "number of bits to shift" operand must preserve its value as an
1123       // unsigned integer:
1124       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1125     }
1126 
1127     Observer.changedInstr(MI);
1128     return Legalized;
1129   case TargetOpcode::G_UDIV:
1130   case TargetOpcode::G_UREM:
1131     Observer.changingInstr(MI);
1132     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1133     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1134     widenScalarDst(MI, WideTy);
1135     Observer.changedInstr(MI);
1136     return Legalized;
1137 
1138   case TargetOpcode::G_SELECT:
1139     Observer.changingInstr(MI);
1140     if (TypeIdx == 0) {
1141       // Perform operation at larger width (any extension is fine here, high
1142       // bits don't affect the result) and then truncate the result back to the
1143       // original type.
1144       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1145       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1146       widenScalarDst(MI, WideTy);
1147     } else {
1148       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1149       // Explicit extension is required here since high bits affect the result.
1150       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1151     }
1152     Observer.changedInstr(MI);
1153     return Legalized;
1154 
1155   case TargetOpcode::G_FPTOSI:
1156   case TargetOpcode::G_FPTOUI:
1157     if (TypeIdx != 0)
1158       return UnableToLegalize;
1159     Observer.changingInstr(MI);
1160     widenScalarDst(MI, WideTy);
1161     Observer.changedInstr(MI);
1162     return Legalized;
1163 
1164   case TargetOpcode::G_SITOFP:
1165     if (TypeIdx != 1)
1166       return UnableToLegalize;
1167     Observer.changingInstr(MI);
1168     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1169     Observer.changedInstr(MI);
1170     return Legalized;
1171 
1172   case TargetOpcode::G_UITOFP:
1173     if (TypeIdx != 1)
1174       return UnableToLegalize;
1175     Observer.changingInstr(MI);
1176     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1177     Observer.changedInstr(MI);
1178     return Legalized;
1179 
1180   case TargetOpcode::G_LOAD:
1181   case TargetOpcode::G_SEXTLOAD:
1182   case TargetOpcode::G_ZEXTLOAD:
1183     Observer.changingInstr(MI);
1184     widenScalarDst(MI, WideTy);
1185     Observer.changedInstr(MI);
1186     return Legalized;
1187 
1188   case TargetOpcode::G_STORE: {
1189     if (TypeIdx != 0)
1190       return UnableToLegalize;
1191 
1192     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1193     if (!isPowerOf2_32(Ty.getSizeInBits()))
1194       return UnableToLegalize;
1195 
1196     Observer.changingInstr(MI);
1197 
1198     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1199       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1200     widenScalarSrc(MI, WideTy, 0, ExtType);
1201 
1202     Observer.changedInstr(MI);
1203     return Legalized;
1204   }
1205   case TargetOpcode::G_CONSTANT: {
1206     MachineOperand &SrcMO = MI.getOperand(1);
1207     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1208     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1209     Observer.changingInstr(MI);
1210     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1211 
1212     widenScalarDst(MI, WideTy);
1213     Observer.changedInstr(MI);
1214     return Legalized;
1215   }
1216   case TargetOpcode::G_FCONSTANT: {
1217     MachineOperand &SrcMO = MI.getOperand(1);
1218     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1219     APFloat Val = SrcMO.getFPImm()->getValueAPF();
1220     bool LosesInfo;
1221     switch (WideTy.getSizeInBits()) {
1222     case 32:
1223       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1224                   &LosesInfo);
1225       break;
1226     case 64:
1227       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1228                   &LosesInfo);
1229       break;
1230     default:
1231       return UnableToLegalize;
1232     }
1233 
1234     assert(!LosesInfo && "extend should always be lossless");
1235 
1236     Observer.changingInstr(MI);
1237     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1238 
1239     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1240     Observer.changedInstr(MI);
1241     return Legalized;
1242   }
1243   case TargetOpcode::G_IMPLICIT_DEF: {
1244     Observer.changingInstr(MI);
1245     widenScalarDst(MI, WideTy);
1246     Observer.changedInstr(MI);
1247     return Legalized;
1248   }
1249   case TargetOpcode::G_BRCOND:
1250     Observer.changingInstr(MI);
1251     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1252     Observer.changedInstr(MI);
1253     return Legalized;
1254 
1255   case TargetOpcode::G_FCMP:
1256     Observer.changingInstr(MI);
1257     if (TypeIdx == 0)
1258       widenScalarDst(MI, WideTy);
1259     else {
1260       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1261       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1262     }
1263     Observer.changedInstr(MI);
1264     return Legalized;
1265 
1266   case TargetOpcode::G_ICMP:
1267     Observer.changingInstr(MI);
1268     if (TypeIdx == 0)
1269       widenScalarDst(MI, WideTy);
1270     else {
1271       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1272                                MI.getOperand(1).getPredicate()))
1273                                ? TargetOpcode::G_SEXT
1274                                : TargetOpcode::G_ZEXT;
1275       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1276       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1277     }
1278     Observer.changedInstr(MI);
1279     return Legalized;
1280 
1281   case TargetOpcode::G_GEP:
1282     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1283     Observer.changingInstr(MI);
1284     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1285     Observer.changedInstr(MI);
1286     return Legalized;
1287 
1288   case TargetOpcode::G_PHI: {
1289     assert(TypeIdx == 0 && "Expecting only Idx 0");
1290 
1291     Observer.changingInstr(MI);
1292     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1293       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1294       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1295       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1296     }
1297 
1298     MachineBasicBlock &MBB = *MI.getParent();
1299     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1300     widenScalarDst(MI, WideTy);
1301     Observer.changedInstr(MI);
1302     return Legalized;
1303   }
1304   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1305     if (TypeIdx == 0) {
1306       unsigned VecReg = MI.getOperand(1).getReg();
1307       LLT VecTy = MRI.getType(VecReg);
1308       Observer.changingInstr(MI);
1309 
1310       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1311                                      WideTy.getSizeInBits()),
1312                      1, TargetOpcode::G_SEXT);
1313 
1314       widenScalarDst(MI, WideTy, 0);
1315       Observer.changedInstr(MI);
1316       return Legalized;
1317     }
1318 
1319     if (TypeIdx != 2)
1320       return UnableToLegalize;
1321     Observer.changingInstr(MI);
1322     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1323     Observer.changedInstr(MI);
1324     return Legalized;
1325   }
1326   case TargetOpcode::G_FADD:
1327   case TargetOpcode::G_FMUL:
1328   case TargetOpcode::G_FSUB:
1329   case TargetOpcode::G_FMA:
1330   case TargetOpcode::G_FNEG:
1331   case TargetOpcode::G_FABS:
1332   case TargetOpcode::G_FCANONICALIZE:
1333   case TargetOpcode::G_FDIV:
1334   case TargetOpcode::G_FREM:
1335   case TargetOpcode::G_FCEIL:
1336   case TargetOpcode::G_FFLOOR:
1337   case TargetOpcode::G_FCOS:
1338   case TargetOpcode::G_FSIN:
1339   case TargetOpcode::G_FLOG10:
1340   case TargetOpcode::G_FLOG:
1341   case TargetOpcode::G_FLOG2:
1342   case TargetOpcode::G_FSQRT:
1343   case TargetOpcode::G_FEXP:
1344     assert(TypeIdx == 0);
1345     Observer.changingInstr(MI);
1346 
1347     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1348       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1349 
1350     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1351     Observer.changedInstr(MI);
1352     return Legalized;
1353   case TargetOpcode::G_INTTOPTR:
1354     if (TypeIdx != 1)
1355       return UnableToLegalize;
1356 
1357     Observer.changingInstr(MI);
1358     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1359     Observer.changedInstr(MI);
1360     return Legalized;
1361   case TargetOpcode::G_PTRTOINT:
1362     if (TypeIdx != 0)
1363       return UnableToLegalize;
1364 
1365     Observer.changingInstr(MI);
1366     widenScalarDst(MI, WideTy, 0);
1367     Observer.changedInstr(MI);
1368     return Legalized;
1369   }
1370 }
1371 
1372 LegalizerHelper::LegalizeResult
1373 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1374   using namespace TargetOpcode;
1375   MIRBuilder.setInstr(MI);
1376 
1377   switch(MI.getOpcode()) {
1378   default:
1379     return UnableToLegalize;
1380   case TargetOpcode::G_SREM:
1381   case TargetOpcode::G_UREM: {
1382     unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
1383     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1384         .addDef(QuotReg)
1385         .addUse(MI.getOperand(1).getReg())
1386         .addUse(MI.getOperand(2).getReg());
1387 
1388     unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
1389     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1390     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1391                         ProdReg);
1392     MI.eraseFromParent();
1393     return Legalized;
1394   }
1395   case TargetOpcode::G_SMULO:
1396   case TargetOpcode::G_UMULO: {
1397     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1398     // result.
1399     unsigned Res = MI.getOperand(0).getReg();
1400     unsigned Overflow = MI.getOperand(1).getReg();
1401     unsigned LHS = MI.getOperand(2).getReg();
1402     unsigned RHS = MI.getOperand(3).getReg();
1403 
1404     MIRBuilder.buildMul(Res, LHS, RHS);
1405 
1406     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1407                           ? TargetOpcode::G_SMULH
1408                           : TargetOpcode::G_UMULH;
1409 
1410     unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1411     MIRBuilder.buildInstr(Opcode)
1412       .addDef(HiPart)
1413       .addUse(LHS)
1414       .addUse(RHS);
1415 
1416     unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1417     MIRBuilder.buildConstant(Zero, 0);
1418 
1419     // For *signed* multiply, overflow is detected by checking:
1420     // (hi != (lo >> bitwidth-1))
1421     if (Opcode == TargetOpcode::G_SMULH) {
1422       unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1423       unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1424       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1425       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1426         .addDef(Shifted)
1427         .addUse(Res)
1428         .addUse(ShiftAmt);
1429       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1430     } else {
1431       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1432     }
1433     MI.eraseFromParent();
1434     return Legalized;
1435   }
1436   case TargetOpcode::G_FNEG: {
1437     // TODO: Handle vector types once we are able to
1438     // represent them.
1439     if (Ty.isVector())
1440       return UnableToLegalize;
1441     unsigned Res = MI.getOperand(0).getReg();
1442     Type *ZeroTy;
1443     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1444     switch (Ty.getSizeInBits()) {
1445     case 16:
1446       ZeroTy = Type::getHalfTy(Ctx);
1447       break;
1448     case 32:
1449       ZeroTy = Type::getFloatTy(Ctx);
1450       break;
1451     case 64:
1452       ZeroTy = Type::getDoubleTy(Ctx);
1453       break;
1454     case 128:
1455       ZeroTy = Type::getFP128Ty(Ctx);
1456       break;
1457     default:
1458       llvm_unreachable("unexpected floating-point type");
1459     }
1460     ConstantFP &ZeroForNegation =
1461         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1462     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1463     MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1464         .addDef(Res)
1465         .addUse(Zero->getOperand(0).getReg())
1466         .addUse(MI.getOperand(1).getReg());
1467     MI.eraseFromParent();
1468     return Legalized;
1469   }
1470   case TargetOpcode::G_FSUB: {
1471     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1472     // First, check if G_FNEG is marked as Lower. If so, we may
1473     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1474     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1475       return UnableToLegalize;
1476     unsigned Res = MI.getOperand(0).getReg();
1477     unsigned LHS = MI.getOperand(1).getReg();
1478     unsigned RHS = MI.getOperand(2).getReg();
1479     unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1480     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1481     MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1482         .addDef(Res)
1483         .addUse(LHS)
1484         .addUse(Neg);
1485     MI.eraseFromParent();
1486     return Legalized;
1487   }
1488   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1489     unsigned OldValRes = MI.getOperand(0).getReg();
1490     unsigned SuccessRes = MI.getOperand(1).getReg();
1491     unsigned Addr = MI.getOperand(2).getReg();
1492     unsigned CmpVal = MI.getOperand(3).getReg();
1493     unsigned NewVal = MI.getOperand(4).getReg();
1494     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1495                                   **MI.memoperands_begin());
1496     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1497     MI.eraseFromParent();
1498     return Legalized;
1499   }
1500   case TargetOpcode::G_LOAD:
1501   case TargetOpcode::G_SEXTLOAD:
1502   case TargetOpcode::G_ZEXTLOAD: {
1503     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1504     unsigned DstReg = MI.getOperand(0).getReg();
1505     unsigned PtrReg = MI.getOperand(1).getReg();
1506     LLT DstTy = MRI.getType(DstReg);
1507     auto &MMO = **MI.memoperands_begin();
1508 
1509     if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1510       // In the case of G_LOAD, this was a non-extending load already and we're
1511       // about to lower to the same instruction.
1512       if (MI.getOpcode() == TargetOpcode::G_LOAD)
1513           return UnableToLegalize;
1514       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1515       MI.eraseFromParent();
1516       return Legalized;
1517     }
1518 
1519     if (DstTy.isScalar()) {
1520       unsigned TmpReg = MRI.createGenericVirtualRegister(
1521           LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1522       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1523       switch (MI.getOpcode()) {
1524       default:
1525         llvm_unreachable("Unexpected opcode");
1526       case TargetOpcode::G_LOAD:
1527         MIRBuilder.buildAnyExt(DstReg, TmpReg);
1528         break;
1529       case TargetOpcode::G_SEXTLOAD:
1530         MIRBuilder.buildSExt(DstReg, TmpReg);
1531         break;
1532       case TargetOpcode::G_ZEXTLOAD:
1533         MIRBuilder.buildZExt(DstReg, TmpReg);
1534         break;
1535       }
1536       MI.eraseFromParent();
1537       return Legalized;
1538     }
1539 
1540     return UnableToLegalize;
1541   }
1542   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1543   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1544   case TargetOpcode::G_CTLZ:
1545   case TargetOpcode::G_CTTZ:
1546   case TargetOpcode::G_CTPOP:
1547     return lowerBitCount(MI, TypeIdx, Ty);
1548   case G_UADDE: {
1549     unsigned Res = MI.getOperand(0).getReg();
1550     unsigned CarryOut = MI.getOperand(1).getReg();
1551     unsigned LHS = MI.getOperand(2).getReg();
1552     unsigned RHS = MI.getOperand(3).getReg();
1553     unsigned CarryIn = MI.getOperand(4).getReg();
1554 
1555     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1556     unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1557 
1558     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1559     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1560     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1561     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1562 
1563     MI.eraseFromParent();
1564     return Legalized;
1565   }
1566   case G_USUBO: {
1567     unsigned Res = MI.getOperand(0).getReg();
1568     unsigned BorrowOut = MI.getOperand(1).getReg();
1569     unsigned LHS = MI.getOperand(2).getReg();
1570     unsigned RHS = MI.getOperand(3).getReg();
1571 
1572     MIRBuilder.buildSub(Res, LHS, RHS);
1573     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1574 
1575     MI.eraseFromParent();
1576     return Legalized;
1577   }
1578   case G_USUBE: {
1579     unsigned Res = MI.getOperand(0).getReg();
1580     unsigned BorrowOut = MI.getOperand(1).getReg();
1581     unsigned LHS = MI.getOperand(2).getReg();
1582     unsigned RHS = MI.getOperand(3).getReg();
1583     unsigned BorrowIn = MI.getOperand(4).getReg();
1584 
1585     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1586     unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1587     unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1588     unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1589 
1590     MIRBuilder.buildSub(TmpRes, LHS, RHS);
1591     MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1592     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1593     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1594     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1595     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1596 
1597     MI.eraseFromParent();
1598     return Legalized;
1599   }
1600   }
1601 }
1602 
1603 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1604     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1605   SmallVector<unsigned, 2> DstRegs;
1606 
1607   unsigned NarrowSize = NarrowTy.getSizeInBits();
1608   unsigned DstReg = MI.getOperand(0).getReg();
1609   unsigned Size = MRI.getType(DstReg).getSizeInBits();
1610   int NumParts = Size / NarrowSize;
1611   // FIXME: Don't know how to handle the situation where the small vectors
1612   // aren't all the same size yet.
1613   if (Size % NarrowSize != 0)
1614     return UnableToLegalize;
1615 
1616   for (int i = 0; i < NumParts; ++i) {
1617     unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1618     MIRBuilder.buildUndef(TmpReg);
1619     DstRegs.push_back(TmpReg);
1620   }
1621 
1622   if (NarrowTy.isVector())
1623     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1624   else
1625     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1626 
1627   MI.eraseFromParent();
1628   return Legalized;
1629 }
1630 
1631 LegalizerHelper::LegalizeResult
1632 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
1633                                           LLT NarrowTy) {
1634   const unsigned Opc = MI.getOpcode();
1635   const unsigned NumOps = MI.getNumOperands() - 1;
1636   const unsigned NarrowSize = NarrowTy.getSizeInBits();
1637   const unsigned DstReg = MI.getOperand(0).getReg();
1638   const unsigned Flags = MI.getFlags();
1639   const LLT DstTy = MRI.getType(DstReg);
1640   const unsigned Size = DstTy.getSizeInBits();
1641   const int NumParts = Size / NarrowSize;
1642   const LLT EltTy = DstTy.getElementType();
1643   const unsigned EltSize = EltTy.getSizeInBits();
1644   const unsigned BitsForNumParts = NarrowSize * NumParts;
1645 
1646   // Check if we have any leftovers. If we do, then only handle the case where
1647   // the leftover is one element.
1648   if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
1649     return UnableToLegalize;
1650 
1651   if (BitsForNumParts != Size) {
1652     unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
1653     MIRBuilder.buildUndef(AccumDstReg);
1654 
1655     // Handle the pieces which evenly divide into the requested type with
1656     // extract/op/insert sequence.
1657     for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
1658       SmallVector<SrcOp, 4> SrcOps;
1659       for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1660         unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
1661         MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
1662         SrcOps.push_back(PartOpReg);
1663       }
1664 
1665       unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
1666       MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1667 
1668       unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
1669       MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
1670       AccumDstReg = PartInsertReg;
1671     }
1672 
1673     // Handle the remaining element sized leftover piece.
1674     SmallVector<SrcOp, 4> SrcOps;
1675     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1676       unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy);
1677       MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
1678                               BitsForNumParts);
1679       SrcOps.push_back(PartOpReg);
1680     }
1681 
1682     unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy);
1683     MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1684     MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
1685     MI.eraseFromParent();
1686 
1687     return Legalized;
1688   }
1689 
1690   SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1691 
1692   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1693 
1694   if (NumOps >= 2)
1695     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1696 
1697   if (NumOps >= 3)
1698     extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1699 
1700   for (int i = 0; i < NumParts; ++i) {
1701     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1702 
1703     if (NumOps == 1)
1704       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1705     else if (NumOps == 2) {
1706       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1707     } else if (NumOps == 3) {
1708       MIRBuilder.buildInstr(Opc, {DstReg},
1709                             {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1710     }
1711 
1712     DstRegs.push_back(DstReg);
1713   }
1714 
1715   if (NarrowTy.isVector())
1716     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1717   else
1718     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1719 
1720   MI.eraseFromParent();
1721   return Legalized;
1722 }
1723 
1724 // Handle splitting vector operations which need to have the same number of
1725 // elements in each type index, but each type index may have a different element
1726 // type.
1727 //
1728 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1729 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1730 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1731 //
1732 // Also handles some irregular breakdown cases, e.g.
1733 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1734 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1735 //             s64 = G_SHL s64, s32
1736 LegalizerHelper::LegalizeResult
1737 LegalizerHelper::fewerElementsVectorMultiEltType(
1738   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
1739   if (TypeIdx != 0)
1740     return UnableToLegalize;
1741 
1742   const LLT NarrowTy0 = NarrowTyArg;
1743   const unsigned NewNumElts =
1744       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
1745 
1746   const unsigned DstReg = MI.getOperand(0).getReg();
1747   LLT DstTy = MRI.getType(DstReg);
1748   LLT LeftoverTy0;
1749 
1750   // All of the operands need to have the same number of elements, so if we can
1751   // determine a type breakdown for the result type, we can for all of the
1752   // source types.
1753   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0);
1754   if (NumParts < 0)
1755     return UnableToLegalize;
1756 
1757   SmallVector<MachineInstrBuilder, 4> NewInsts;
1758 
1759   SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs;
1760   SmallVector<unsigned, 4> PartRegs, LeftoverRegs;
1761 
1762   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1763     LLT LeftoverTy;
1764     unsigned SrcReg = MI.getOperand(I).getReg();
1765     LLT SrcTyI = MRI.getType(SrcReg);
1766     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
1767     LLT LeftoverTyI;
1768 
1769     // Split this operand into the requested typed registers, and any leftover
1770     // required to reproduce the original type.
1771     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
1772                       LeftoverRegs))
1773       return UnableToLegalize;
1774 
1775     if (I == 1) {
1776       // For the first operand, create an instruction for each part and setup
1777       // the result.
1778       for (unsigned PartReg : PartRegs) {
1779         unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1780         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
1781                                .addDef(PartDstReg)
1782                                .addUse(PartReg));
1783         DstRegs.push_back(PartDstReg);
1784       }
1785 
1786       for (unsigned LeftoverReg : LeftoverRegs) {
1787         unsigned PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
1788         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
1789                                .addDef(PartDstReg)
1790                                .addUse(LeftoverReg));
1791         LeftoverDstRegs.push_back(PartDstReg);
1792       }
1793     } else {
1794       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
1795 
1796       // Add the newly created operand splits to the existing instructions. The
1797       // odd-sized pieces are ordered after the requested NarrowTyArg sized
1798       // pieces.
1799       unsigned InstCount = 0;
1800       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
1801         NewInsts[InstCount++].addUse(PartRegs[J]);
1802       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
1803         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
1804     }
1805 
1806     PartRegs.clear();
1807     LeftoverRegs.clear();
1808   }
1809 
1810   // Insert the newly built operations and rebuild the result register.
1811   for (auto &MIB : NewInsts)
1812     MIRBuilder.insertInstr(MIB);
1813 
1814   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
1815 
1816   MI.eraseFromParent();
1817   return Legalized;
1818 }
1819 
1820 LegalizerHelper::LegalizeResult
1821 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
1822                                           LLT NarrowTy) {
1823   if (TypeIdx != 0)
1824     return UnableToLegalize;
1825 
1826   unsigned DstReg = MI.getOperand(0).getReg();
1827   unsigned SrcReg = MI.getOperand(1).getReg();
1828   LLT DstTy = MRI.getType(DstReg);
1829   LLT SrcTy = MRI.getType(SrcReg);
1830 
1831   LLT NarrowTy0 = NarrowTy;
1832   LLT NarrowTy1;
1833   unsigned NumParts;
1834 
1835   if (NarrowTy.isVector()) {
1836     // Uneven breakdown not handled.
1837     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
1838     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
1839       return UnableToLegalize;
1840 
1841     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
1842   } else {
1843     NumParts = DstTy.getNumElements();
1844     NarrowTy1 = SrcTy.getElementType();
1845   }
1846 
1847   SmallVector<unsigned, 4> SrcRegs, DstRegs;
1848   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
1849 
1850   for (unsigned I = 0; I < NumParts; ++I) {
1851     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1852     MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
1853       .addDef(DstReg)
1854       .addUse(SrcRegs[I]);
1855 
1856     NewInst->setFlags(MI.getFlags());
1857     DstRegs.push_back(DstReg);
1858   }
1859 
1860   if (NarrowTy.isVector())
1861     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1862   else
1863     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1864 
1865   MI.eraseFromParent();
1866   return Legalized;
1867 }
1868 
1869 LegalizerHelper::LegalizeResult
1870 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
1871                                         LLT NarrowTy) {
1872   unsigned DstReg = MI.getOperand(0).getReg();
1873   unsigned Src0Reg = MI.getOperand(2).getReg();
1874   LLT DstTy = MRI.getType(DstReg);
1875   LLT SrcTy = MRI.getType(Src0Reg);
1876 
1877   unsigned NumParts;
1878   LLT NarrowTy0, NarrowTy1;
1879 
1880   if (TypeIdx == 0) {
1881     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1882     unsigned OldElts = DstTy.getNumElements();
1883 
1884     NarrowTy0 = NarrowTy;
1885     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
1886     NarrowTy1 = NarrowTy.isVector() ?
1887       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
1888       SrcTy.getElementType();
1889 
1890   } else {
1891     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1892     unsigned OldElts = SrcTy.getNumElements();
1893 
1894     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
1895       NarrowTy.getNumElements();
1896     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
1897                             DstTy.getScalarSizeInBits());
1898     NarrowTy1 = NarrowTy;
1899   }
1900 
1901   // FIXME: Don't know how to handle the situation where the small vectors
1902   // aren't all the same size yet.
1903   if (NarrowTy1.isVector() &&
1904       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
1905     return UnableToLegalize;
1906 
1907   CmpInst::Predicate Pred
1908     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1909 
1910   SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1911   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
1912   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
1913 
1914   for (unsigned I = 0; I < NumParts; ++I) {
1915     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1916     DstRegs.push_back(DstReg);
1917 
1918     if (MI.getOpcode() == TargetOpcode::G_ICMP)
1919       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1920     else {
1921       MachineInstr *NewCmp
1922         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1923       NewCmp->setFlags(MI.getFlags());
1924     }
1925   }
1926 
1927   if (NarrowTy1.isVector())
1928     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1929   else
1930     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1931 
1932   MI.eraseFromParent();
1933   return Legalized;
1934 }
1935 
1936 LegalizerHelper::LegalizeResult
1937 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
1938                                            LLT NarrowTy) {
1939   unsigned DstReg = MI.getOperand(0).getReg();
1940   unsigned CondReg = MI.getOperand(1).getReg();
1941 
1942   unsigned NumParts = 0;
1943   LLT NarrowTy0, NarrowTy1;
1944 
1945   LLT DstTy = MRI.getType(DstReg);
1946   LLT CondTy = MRI.getType(CondReg);
1947   unsigned Size = DstTy.getSizeInBits();
1948 
1949   assert(TypeIdx == 0 || CondTy.isVector());
1950 
1951   if (TypeIdx == 0) {
1952     NarrowTy0 = NarrowTy;
1953     NarrowTy1 = CondTy;
1954 
1955     unsigned NarrowSize = NarrowTy0.getSizeInBits();
1956     // FIXME: Don't know how to handle the situation where the small vectors
1957     // aren't all the same size yet.
1958     if (Size % NarrowSize != 0)
1959       return UnableToLegalize;
1960 
1961     NumParts = Size / NarrowSize;
1962 
1963     // Need to break down the condition type
1964     if (CondTy.isVector()) {
1965       if (CondTy.getNumElements() == NumParts)
1966         NarrowTy1 = CondTy.getElementType();
1967       else
1968         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
1969                                 CondTy.getScalarSizeInBits());
1970     }
1971   } else {
1972     NumParts = CondTy.getNumElements();
1973     if (NarrowTy.isVector()) {
1974       // TODO: Handle uneven breakdown.
1975       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
1976         return UnableToLegalize;
1977 
1978       return UnableToLegalize;
1979     } else {
1980       NarrowTy0 = DstTy.getElementType();
1981       NarrowTy1 = NarrowTy;
1982     }
1983   }
1984 
1985   SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1986   if (CondTy.isVector())
1987     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
1988 
1989   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
1990   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
1991 
1992   for (unsigned i = 0; i < NumParts; ++i) {
1993     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1994     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
1995                            Src1Regs[i], Src2Regs[i]);
1996     DstRegs.push_back(DstReg);
1997   }
1998 
1999   if (NarrowTy0.isVector())
2000     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2001   else
2002     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2003 
2004   MI.eraseFromParent();
2005   return Legalized;
2006 }
2007 
2008 LegalizerHelper::LegalizeResult
2009 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2010                                       LLT NarrowTy) {
2011   // FIXME: Don't know how to handle secondary types yet.
2012   if (TypeIdx != 0)
2013     return UnableToLegalize;
2014 
2015   MachineMemOperand *MMO = *MI.memoperands_begin();
2016 
2017   // This implementation doesn't work for atomics. Give up instead of doing
2018   // something invalid.
2019   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2020       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
2021     return UnableToLegalize;
2022 
2023   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2024   unsigned ValReg = MI.getOperand(0).getReg();
2025   unsigned AddrReg = MI.getOperand(1).getReg();
2026   LLT ValTy = MRI.getType(ValReg);
2027 
2028   int NumParts = -1;
2029   LLT LeftoverTy;
2030   SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs;
2031   if (IsLoad) {
2032     NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2033   } else {
2034     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2035                      NarrowLeftoverRegs))
2036       NumParts = NarrowRegs.size();
2037   }
2038 
2039   if (NumParts == -1)
2040     return UnableToLegalize;
2041 
2042   const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2043 
2044   unsigned TotalSize = ValTy.getSizeInBits();
2045 
2046   // Split the load/store into PartTy sized pieces starting at Offset. If this
2047   // is a load, return the new registers in ValRegs. For a store, each elements
2048   // of ValRegs should be PartTy. Returns the next offset that needs to be
2049   // handled.
2050   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs,
2051                              unsigned Offset) -> unsigned {
2052     MachineFunction &MF = MIRBuilder.getMF();
2053     unsigned PartSize = PartTy.getSizeInBits();
2054     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2055          Offset += PartSize, ++Idx) {
2056       unsigned ByteSize = PartSize / 8;
2057       unsigned ByteOffset = Offset / 8;
2058       unsigned NewAddrReg = 0;
2059 
2060       MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2061 
2062       MachineMemOperand *NewMMO =
2063         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2064 
2065       if (IsLoad) {
2066         unsigned Dst = MRI.createGenericVirtualRegister(PartTy);
2067         ValRegs.push_back(Dst);
2068         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2069       } else {
2070         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2071       }
2072     }
2073 
2074     return Offset;
2075   };
2076 
2077   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2078 
2079   // Handle the rest of the register if this isn't an even type breakdown.
2080   if (LeftoverTy.isValid())
2081     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2082 
2083   if (IsLoad) {
2084     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2085                 LeftoverTy, NarrowLeftoverRegs);
2086   }
2087 
2088   MI.eraseFromParent();
2089   return Legalized;
2090 }
2091 
2092 LegalizerHelper::LegalizeResult
2093 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2094                                      LLT NarrowTy) {
2095   using namespace TargetOpcode;
2096 
2097   MIRBuilder.setInstr(MI);
2098   switch (MI.getOpcode()) {
2099   case G_IMPLICIT_DEF:
2100     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2101   case G_AND:
2102   case G_OR:
2103   case G_XOR:
2104   case G_ADD:
2105   case G_SUB:
2106   case G_MUL:
2107   case G_SMULH:
2108   case G_UMULH:
2109   case G_FADD:
2110   case G_FMUL:
2111   case G_FSUB:
2112   case G_FNEG:
2113   case G_FABS:
2114   case G_FCANONICALIZE:
2115   case G_FDIV:
2116   case G_FREM:
2117   case G_FMA:
2118   case G_FPOW:
2119   case G_FEXP:
2120   case G_FEXP2:
2121   case G_FLOG:
2122   case G_FLOG2:
2123   case G_FLOG10:
2124   case G_FCEIL:
2125   case G_FFLOOR:
2126   case G_INTRINSIC_ROUND:
2127   case G_INTRINSIC_TRUNC:
2128   case G_FCOS:
2129   case G_FSIN:
2130   case G_FSQRT:
2131   case G_BSWAP:
2132   case G_CTLZ:
2133     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2134   case G_SHL:
2135   case G_LSHR:
2136   case G_ASHR:
2137     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2138   case G_ZEXT:
2139   case G_SEXT:
2140   case G_ANYEXT:
2141   case G_FPEXT:
2142   case G_FPTRUNC:
2143   case G_SITOFP:
2144   case G_UITOFP:
2145   case G_FPTOSI:
2146   case G_FPTOUI:
2147   case G_INTTOPTR:
2148   case G_PTRTOINT:
2149   case G_ADDRSPACE_CAST:
2150     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2151   case G_ICMP:
2152   case G_FCMP:
2153     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2154   case G_SELECT:
2155     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2156   case G_LOAD:
2157   case G_STORE:
2158     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2159   default:
2160     return UnableToLegalize;
2161   }
2162 }
2163 
2164 LegalizerHelper::LegalizeResult
2165 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2166                                              const LLT HalfTy, const LLT AmtTy) {
2167 
2168   unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2169   unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2170   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2171 
2172   if (Amt.isNullValue()) {
2173     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2174     MI.eraseFromParent();
2175     return Legalized;
2176   }
2177 
2178   LLT NVT = HalfTy;
2179   unsigned NVTBits = HalfTy.getSizeInBits();
2180   unsigned VTBits = 2 * NVTBits;
2181 
2182   SrcOp Lo(0), Hi(0);
2183   if (MI.getOpcode() == TargetOpcode::G_SHL) {
2184     if (Amt.ugt(VTBits)) {
2185       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2186     } else if (Amt.ugt(NVTBits)) {
2187       Lo = MIRBuilder.buildConstant(NVT, 0);
2188       Hi = MIRBuilder.buildShl(NVT, InL,
2189                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2190     } else if (Amt == NVTBits) {
2191       Lo = MIRBuilder.buildConstant(NVT, 0);
2192       Hi = InL;
2193     } else {
2194       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2195       auto OrLHS =
2196           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2197       auto OrRHS = MIRBuilder.buildLShr(
2198           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2199       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2200     }
2201   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2202     if (Amt.ugt(VTBits)) {
2203       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2204     } else if (Amt.ugt(NVTBits)) {
2205       Lo = MIRBuilder.buildLShr(NVT, InH,
2206                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2207       Hi = MIRBuilder.buildConstant(NVT, 0);
2208     } else if (Amt == NVTBits) {
2209       Lo = InH;
2210       Hi = MIRBuilder.buildConstant(NVT, 0);
2211     } else {
2212       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2213 
2214       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2215       auto OrRHS = MIRBuilder.buildShl(
2216           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2217 
2218       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2219       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2220     }
2221   } else {
2222     if (Amt.ugt(VTBits)) {
2223       Hi = Lo = MIRBuilder.buildAShr(
2224           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2225     } else if (Amt.ugt(NVTBits)) {
2226       Lo = MIRBuilder.buildAShr(NVT, InH,
2227                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2228       Hi = MIRBuilder.buildAShr(NVT, InH,
2229                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2230     } else if (Amt == NVTBits) {
2231       Lo = InH;
2232       Hi = MIRBuilder.buildAShr(NVT, InH,
2233                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2234     } else {
2235       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2236 
2237       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2238       auto OrRHS = MIRBuilder.buildShl(
2239           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2240 
2241       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2242       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2243     }
2244   }
2245 
2246   MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2247   MI.eraseFromParent();
2248 
2249   return Legalized;
2250 }
2251 
2252 // TODO: Optimize if constant shift amount.
2253 LegalizerHelper::LegalizeResult
2254 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
2255                                    LLT RequestedTy) {
2256   if (TypeIdx == 1) {
2257     Observer.changingInstr(MI);
2258     narrowScalarSrc(MI, RequestedTy, 2);
2259     Observer.changedInstr(MI);
2260     return Legalized;
2261   }
2262 
2263   unsigned DstReg = MI.getOperand(0).getReg();
2264   LLT DstTy = MRI.getType(DstReg);
2265   if (DstTy.isVector())
2266     return UnableToLegalize;
2267 
2268   unsigned Amt = MI.getOperand(2).getReg();
2269   LLT ShiftAmtTy = MRI.getType(Amt);
2270   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2271   if (DstEltSize % 2 != 0)
2272     return UnableToLegalize;
2273 
2274   // Ignore the input type. We can only go to exactly half the size of the
2275   // input. If that isn't small enough, the resulting pieces will be further
2276   // legalized.
2277   const unsigned NewBitSize = DstEltSize / 2;
2278   const LLT HalfTy = LLT::scalar(NewBitSize);
2279   const LLT CondTy = LLT::scalar(1);
2280 
2281   if (const MachineInstr *KShiftAmt =
2282           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2283     return narrowScalarShiftByConstant(
2284         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2285   }
2286 
2287   // TODO: Expand with known bits.
2288 
2289   // Handle the fully general expansion by an unknown amount.
2290   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2291 
2292   unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2293   unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2294   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2295 
2296   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2297   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2298 
2299   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2300   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2301   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2302 
2303   unsigned ResultRegs[2];
2304   switch (MI.getOpcode()) {
2305   case TargetOpcode::G_SHL: {
2306     // Short: ShAmt < NewBitSize
2307     auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2308 
2309     auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2310     auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2311     auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2312 
2313     // Long: ShAmt >= NewBitSize
2314     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
2315     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2316 
2317     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2318     auto Hi = MIRBuilder.buildSelect(
2319         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2320 
2321     ResultRegs[0] = Lo.getReg(0);
2322     ResultRegs[1] = Hi.getReg(0);
2323     break;
2324   }
2325   case TargetOpcode::G_LSHR: {
2326     // Short: ShAmt < NewBitSize
2327     auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2328 
2329     auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2330     auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2331     auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2332 
2333     // Long: ShAmt >= NewBitSize
2334     auto HiL = MIRBuilder.buildConstant(HalfTy, 0);          // Hi part is zero.
2335     auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2336 
2337     auto Lo = MIRBuilder.buildSelect(
2338         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2339     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2340 
2341     ResultRegs[0] = Lo.getReg(0);
2342     ResultRegs[1] = Hi.getReg(0);
2343     break;
2344   }
2345   case TargetOpcode::G_ASHR: {
2346     // Short: ShAmt < NewBitSize
2347     auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2348 
2349     auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2350     auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2351     auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2352 
2353     // Long: ShAmt >= NewBitSize
2354 
2355     // Sign of Hi part.
2356     auto HiL = MIRBuilder.buildAShr(
2357         HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2358 
2359     auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2360 
2361     auto Lo = MIRBuilder.buildSelect(
2362         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2363 
2364     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2365 
2366     ResultRegs[0] = Lo.getReg(0);
2367     ResultRegs[1] = Hi.getReg(0);
2368     break;
2369   }
2370   default:
2371     llvm_unreachable("not a shift");
2372   }
2373 
2374   MIRBuilder.buildMerge(DstReg, ResultRegs);
2375   MI.eraseFromParent();
2376   return Legalized;
2377 }
2378 
2379 LegalizerHelper::LegalizeResult
2380 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
2381                                     LLT MoreTy) {
2382   MIRBuilder.setInstr(MI);
2383   unsigned Opc = MI.getOpcode();
2384   switch (Opc) {
2385   case TargetOpcode::G_IMPLICIT_DEF: {
2386     Observer.changingInstr(MI);
2387     moreElementsVectorDst(MI, MoreTy, 0);
2388     Observer.changedInstr(MI);
2389     return Legalized;
2390   }
2391   case TargetOpcode::G_AND:
2392   case TargetOpcode::G_OR:
2393   case TargetOpcode::G_XOR: {
2394     Observer.changingInstr(MI);
2395     moreElementsVectorSrc(MI, MoreTy, 1);
2396     moreElementsVectorSrc(MI, MoreTy, 2);
2397     moreElementsVectorDst(MI, MoreTy, 0);
2398     Observer.changedInstr(MI);
2399     return Legalized;
2400   }
2401   case TargetOpcode::G_EXTRACT:
2402     if (TypeIdx != 1)
2403       return UnableToLegalize;
2404     Observer.changingInstr(MI);
2405     moreElementsVectorSrc(MI, MoreTy, 1);
2406     Observer.changedInstr(MI);
2407     return Legalized;
2408   case TargetOpcode::G_SELECT:
2409     if (TypeIdx != 0)
2410       return UnableToLegalize;
2411     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2412       return UnableToLegalize;
2413 
2414     Observer.changingInstr(MI);
2415     moreElementsVectorSrc(MI, MoreTy, 2);
2416     moreElementsVectorSrc(MI, MoreTy, 3);
2417     moreElementsVectorDst(MI, MoreTy, 0);
2418     Observer.changedInstr(MI);
2419     return Legalized;
2420   default:
2421     return UnableToLegalize;
2422   }
2423 }
2424 
2425 LegalizerHelper::LegalizeResult
2426 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) {
2427   unsigned DstReg = MI.getOperand(0).getReg();
2428   unsigned Src0 = MI.getOperand(1).getReg();
2429   unsigned Src1 = MI.getOperand(2).getReg();
2430   LLT Ty = MRI.getType(DstReg);
2431   if (Ty.isVector())
2432     return UnableToLegalize;
2433 
2434   unsigned Size = Ty.getSizeInBits();
2435   unsigned NewSize = Size / 2;
2436   if (Size != 2 * NewSize)
2437     return UnableToLegalize;
2438 
2439   LLT HalfTy = LLT::scalar(NewSize);
2440   // TODO: if HalfTy != NewTy, handle the breakdown all at once?
2441 
2442   unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
2443   unsigned Lo = MRI.createGenericVirtualRegister(HalfTy);
2444   unsigned Hi = MRI.createGenericVirtualRegister(HalfTy);
2445   unsigned ExtLo = MRI.createGenericVirtualRegister(Ty);
2446   unsigned ExtHi = MRI.createGenericVirtualRegister(Ty);
2447   unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty);
2448 
2449   SmallVector<unsigned, 2> Src0Parts;
2450   SmallVector<unsigned, 2> Src1Parts;
2451 
2452   extractParts(Src0, HalfTy, 2, Src0Parts);
2453   extractParts(Src1, HalfTy, 2, Src1Parts);
2454 
2455   MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]);
2456 
2457   // TODO: Use smulh or umulh depending on what the target has.
2458   MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]);
2459 
2460   MIRBuilder.buildConstant(ShiftAmt, NewSize);
2461   MIRBuilder.buildAnyExt(ExtHi, Hi);
2462   MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt);
2463 
2464   MIRBuilder.buildZExt(ExtLo, Lo);
2465   MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi);
2466   MI.eraseFromParent();
2467   return Legalized;
2468 }
2469 
2470 
2471 LegalizerHelper::LegalizeResult
2472 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2473                                      LLT NarrowTy) {
2474   if (TypeIdx != 1)
2475     return UnableToLegalize;
2476 
2477   uint64_t NarrowSize = NarrowTy.getSizeInBits();
2478 
2479   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2480   // FIXME: add support for when SizeOp1 isn't an exact multiple of
2481   // NarrowSize.
2482   if (SizeOp1 % NarrowSize != 0)
2483     return UnableToLegalize;
2484   int NumParts = SizeOp1 / NarrowSize;
2485 
2486   SmallVector<unsigned, 2> SrcRegs, DstRegs;
2487   SmallVector<uint64_t, 2> Indexes;
2488   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2489 
2490   unsigned OpReg = MI.getOperand(0).getReg();
2491   uint64_t OpStart = MI.getOperand(2).getImm();
2492   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2493   for (int i = 0; i < NumParts; ++i) {
2494     unsigned SrcStart = i * NarrowSize;
2495 
2496     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
2497       // No part of the extract uses this subregister, ignore it.
2498       continue;
2499     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2500       // The entire subregister is extracted, forward the value.
2501       DstRegs.push_back(SrcRegs[i]);
2502       continue;
2503     }
2504 
2505     // OpSegStart is where this destination segment would start in OpReg if it
2506     // extended infinitely in both directions.
2507     int64_t ExtractOffset;
2508     uint64_t SegSize;
2509     if (OpStart < SrcStart) {
2510       ExtractOffset = 0;
2511       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
2512     } else {
2513       ExtractOffset = OpStart - SrcStart;
2514       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
2515     }
2516 
2517     unsigned SegReg = SrcRegs[i];
2518     if (ExtractOffset != 0 || SegSize != NarrowSize) {
2519       // A genuine extract is needed.
2520       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2521       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
2522     }
2523 
2524     DstRegs.push_back(SegReg);
2525   }
2526 
2527   unsigned DstReg = MI.getOperand(0).getReg();
2528   if(MRI.getType(DstReg).isVector())
2529     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2530   else
2531     MIRBuilder.buildMerge(DstReg, DstRegs);
2532   MI.eraseFromParent();
2533   return Legalized;
2534 }
2535 
2536 LegalizerHelper::LegalizeResult
2537 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2538                                     LLT NarrowTy) {
2539   // FIXME: Don't know how to handle secondary types yet.
2540   if (TypeIdx != 0)
2541     return UnableToLegalize;
2542 
2543   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2544   uint64_t NarrowSize = NarrowTy.getSizeInBits();
2545 
2546   // FIXME: add support for when SizeOp0 isn't an exact multiple of
2547   // NarrowSize.
2548   if (SizeOp0 % NarrowSize != 0)
2549     return UnableToLegalize;
2550 
2551   int NumParts = SizeOp0 / NarrowSize;
2552 
2553   SmallVector<unsigned, 2> SrcRegs, DstRegs;
2554   SmallVector<uint64_t, 2> Indexes;
2555   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2556 
2557   unsigned OpReg = MI.getOperand(2).getReg();
2558   uint64_t OpStart = MI.getOperand(3).getImm();
2559   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2560   for (int i = 0; i < NumParts; ++i) {
2561     unsigned DstStart = i * NarrowSize;
2562 
2563     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
2564       // No part of the insert affects this subregister, forward the original.
2565       DstRegs.push_back(SrcRegs[i]);
2566       continue;
2567     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2568       // The entire subregister is defined by this insert, forward the new
2569       // value.
2570       DstRegs.push_back(OpReg);
2571       continue;
2572     }
2573 
2574     // OpSegStart is where this destination segment would start in OpReg if it
2575     // extended infinitely in both directions.
2576     int64_t ExtractOffset, InsertOffset;
2577     uint64_t SegSize;
2578     if (OpStart < DstStart) {
2579       InsertOffset = 0;
2580       ExtractOffset = DstStart - OpStart;
2581       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
2582     } else {
2583       InsertOffset = OpStart - DstStart;
2584       ExtractOffset = 0;
2585       SegSize =
2586         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
2587     }
2588 
2589     unsigned SegReg = OpReg;
2590     if (ExtractOffset != 0 || SegSize != OpSize) {
2591       // A genuine extract is needed.
2592       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2593       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
2594     }
2595 
2596     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2597     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
2598     DstRegs.push_back(DstReg);
2599   }
2600 
2601   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
2602   unsigned DstReg = MI.getOperand(0).getReg();
2603   if(MRI.getType(DstReg).isVector())
2604     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2605   else
2606     MIRBuilder.buildMerge(DstReg, DstRegs);
2607   MI.eraseFromParent();
2608   return Legalized;
2609 }
2610 
2611 LegalizerHelper::LegalizeResult
2612 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
2613                                     LLT NarrowTy) {
2614   if (TypeIdx != 0)
2615     return UnableToLegalize;
2616 
2617   unsigned CondReg = MI.getOperand(1).getReg();
2618   LLT CondTy = MRI.getType(CondReg);
2619   if (CondTy.isVector()) // TODO: Handle vselect
2620     return UnableToLegalize;
2621 
2622   unsigned DstReg = MI.getOperand(0).getReg();
2623   LLT DstTy = MRI.getType(DstReg);
2624 
2625   SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
2626   SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
2627   SmallVector<unsigned, 4> Src2Regs, Src2LeftoverRegs;
2628   LLT LeftoverTy;
2629   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
2630                     Src1Regs, Src1LeftoverRegs))
2631     return UnableToLegalize;
2632 
2633   LLT Unused;
2634   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
2635                     Src2Regs, Src2LeftoverRegs))
2636     llvm_unreachable("inconsistent extractParts result");
2637 
2638   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2639     auto Select = MIRBuilder.buildSelect(NarrowTy,
2640                                          CondReg, Src1Regs[I], Src2Regs[I]);
2641     DstRegs.push_back(Select->getOperand(0).getReg());
2642   }
2643 
2644   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2645     auto Select = MIRBuilder.buildSelect(
2646       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
2647     DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
2648   }
2649 
2650   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2651               LeftoverTy, DstLeftoverRegs);
2652 
2653   MI.eraseFromParent();
2654   return Legalized;
2655 }
2656 
2657 LegalizerHelper::LegalizeResult
2658 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2659   unsigned Opc = MI.getOpcode();
2660   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
2661   auto isSupported = [this](const LegalityQuery &Q) {
2662     auto QAction = LI.getAction(Q).Action;
2663     return QAction == Legal || QAction == Libcall || QAction == Custom;
2664   };
2665   switch (Opc) {
2666   default:
2667     return UnableToLegalize;
2668   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
2669     // This trivially expands to CTLZ.
2670     Observer.changingInstr(MI);
2671     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
2672     Observer.changedInstr(MI);
2673     return Legalized;
2674   }
2675   case TargetOpcode::G_CTLZ: {
2676     unsigned SrcReg = MI.getOperand(1).getReg();
2677     unsigned Len = Ty.getSizeInBits();
2678     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
2679       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
2680       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
2681                                              {Ty}, {SrcReg});
2682       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2683       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2684       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2685                                           SrcReg, MIBZero);
2686       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2687                              MIBCtlzZU);
2688       MI.eraseFromParent();
2689       return Legalized;
2690     }
2691     // for now, we do this:
2692     // NewLen = NextPowerOf2(Len);
2693     // x = x | (x >> 1);
2694     // x = x | (x >> 2);
2695     // ...
2696     // x = x | (x >>16);
2697     // x = x | (x >>32); // for 64-bit input
2698     // Upto NewLen/2
2699     // return Len - popcount(x);
2700     //
2701     // Ref: "Hacker's Delight" by Henry Warren
2702     unsigned Op = SrcReg;
2703     unsigned NewLen = PowerOf2Ceil(Len);
2704     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
2705       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
2706       auto MIBOp = MIRBuilder.buildInstr(
2707           TargetOpcode::G_OR, {Ty},
2708           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
2709                                      {Op, MIBShiftAmt})});
2710       Op = MIBOp->getOperand(0).getReg();
2711     }
2712     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
2713     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2714                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
2715     MI.eraseFromParent();
2716     return Legalized;
2717   }
2718   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
2719     // This trivially expands to CTTZ.
2720     Observer.changingInstr(MI);
2721     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
2722     Observer.changedInstr(MI);
2723     return Legalized;
2724   }
2725   case TargetOpcode::G_CTTZ: {
2726     unsigned SrcReg = MI.getOperand(1).getReg();
2727     unsigned Len = Ty.getSizeInBits();
2728     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
2729       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
2730       // zero.
2731       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
2732                                              {Ty}, {SrcReg});
2733       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2734       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2735       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2736                                           SrcReg, MIBZero);
2737       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2738                              MIBCttzZU);
2739       MI.eraseFromParent();
2740       return Legalized;
2741     }
2742     // for now, we use: { return popcount(~x & (x - 1)); }
2743     // unless the target has ctlz but not ctpop, in which case we use:
2744     // { return 32 - nlz(~x & (x-1)); }
2745     // Ref: "Hacker's Delight" by Henry Warren
2746     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
2747     auto MIBNot =
2748         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
2749     auto MIBTmp = MIRBuilder.buildInstr(
2750         TargetOpcode::G_AND, {Ty},
2751         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
2752                                        {SrcReg, MIBCstNeg1})});
2753     if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
2754         isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
2755       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
2756       MIRBuilder.buildInstr(
2757           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2758           {MIBCstLen,
2759            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
2760       MI.eraseFromParent();
2761       return Legalized;
2762     }
2763     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
2764     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
2765     return Legalized;
2766   }
2767   }
2768 }
2769