1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #define DEBUG_TYPE "legalizer"
29 
30 using namespace llvm;
31 using namespace LegalizeActions;
32 
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 ///
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
37 ///
38 /// Returns -1 in the first element of the pair if the breakdown is not
39 /// satisfiable.
40 static std::pair<int, int>
41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42   assert(!LeftoverTy.isValid() && "this is an out argument");
43 
44   unsigned Size = OrigTy.getSizeInBits();
45   unsigned NarrowSize = NarrowTy.getSizeInBits();
46   unsigned NumParts = Size / NarrowSize;
47   unsigned LeftoverSize = Size - NumParts * NarrowSize;
48   assert(Size > NarrowSize);
49 
50   if (LeftoverSize == 0)
51     return {NumParts, 0};
52 
53   if (NarrowTy.isVector()) {
54     unsigned EltSize = OrigTy.getScalarSizeInBits();
55     if (LeftoverSize % EltSize != 0)
56       return {-1, -1};
57     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58   } else {
59     LeftoverTy = LLT::scalar(LeftoverSize);
60   }
61 
62   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63   return std::make_pair(NumParts, NumLeftover);
64 }
65 
66 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
67 
68   if (!Ty.isScalar())
69     return nullptr;
70 
71   switch (Ty.getSizeInBits()) {
72   case 16:
73     return Type::getHalfTy(Ctx);
74   case 32:
75     return Type::getFloatTy(Ctx);
76   case 64:
77     return Type::getDoubleTy(Ctx);
78   case 128:
79     return Type::getFP128Ty(Ctx);
80   default:
81     return nullptr;
82   }
83 }
84 
85 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
86                                  GISelChangeObserver &Observer,
87                                  MachineIRBuilder &Builder)
88     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
89       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
90   MIRBuilder.setMF(MF);
91   MIRBuilder.setChangeObserver(Observer);
92 }
93 
94 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
95                                  GISelChangeObserver &Observer,
96                                  MachineIRBuilder &B)
97     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
98   MIRBuilder.setMF(MF);
99   MIRBuilder.setChangeObserver(Observer);
100 }
101 LegalizerHelper::LegalizeResult
102 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
103   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
104 
105   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
106       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
107     return LI.legalizeIntrinsic(MI, MIRBuilder, Observer) ? Legalized
108                                                           : UnableToLegalize;
109   auto Step = LI.getAction(MI, MRI);
110   switch (Step.Action) {
111   case Legal:
112     LLVM_DEBUG(dbgs() << ".. Already legal\n");
113     return AlreadyLegal;
114   case Libcall:
115     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
116     return libcall(MI);
117   case NarrowScalar:
118     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
119     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
120   case WidenScalar:
121     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
122     return widenScalar(MI, Step.TypeIdx, Step.NewType);
123   case Bitcast:
124     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
125     return bitcast(MI, Step.TypeIdx, Step.NewType);
126   case Lower:
127     LLVM_DEBUG(dbgs() << ".. Lower\n");
128     return lower(MI, Step.TypeIdx, Step.NewType);
129   case FewerElements:
130     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
131     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
132   case MoreElements:
133     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
134     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
135   case Custom:
136     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
137     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
138                                                             : UnableToLegalize;
139   default:
140     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
141     return UnableToLegalize;
142   }
143 }
144 
145 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
146                                    SmallVectorImpl<Register> &VRegs) {
147   for (int i = 0; i < NumParts; ++i)
148     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
149   MIRBuilder.buildUnmerge(VRegs, Reg);
150 }
151 
152 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
153                                    LLT MainTy, LLT &LeftoverTy,
154                                    SmallVectorImpl<Register> &VRegs,
155                                    SmallVectorImpl<Register> &LeftoverRegs) {
156   assert(!LeftoverTy.isValid() && "this is an out argument");
157 
158   unsigned RegSize = RegTy.getSizeInBits();
159   unsigned MainSize = MainTy.getSizeInBits();
160   unsigned NumParts = RegSize / MainSize;
161   unsigned LeftoverSize = RegSize - NumParts * MainSize;
162 
163   // Use an unmerge when possible.
164   if (LeftoverSize == 0) {
165     for (unsigned I = 0; I < NumParts; ++I)
166       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
167     MIRBuilder.buildUnmerge(VRegs, Reg);
168     return true;
169   }
170 
171   if (MainTy.isVector()) {
172     unsigned EltSize = MainTy.getScalarSizeInBits();
173     if (LeftoverSize % EltSize != 0)
174       return false;
175     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
176   } else {
177     LeftoverTy = LLT::scalar(LeftoverSize);
178   }
179 
180   // For irregular sizes, extract the individual parts.
181   for (unsigned I = 0; I != NumParts; ++I) {
182     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
183     VRegs.push_back(NewReg);
184     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
185   }
186 
187   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
188        Offset += LeftoverSize) {
189     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
190     LeftoverRegs.push_back(NewReg);
191     MIRBuilder.buildExtract(NewReg, Reg, Offset);
192   }
193 
194   return true;
195 }
196 
197 void LegalizerHelper::insertParts(Register DstReg,
198                                   LLT ResultTy, LLT PartTy,
199                                   ArrayRef<Register> PartRegs,
200                                   LLT LeftoverTy,
201                                   ArrayRef<Register> LeftoverRegs) {
202   if (!LeftoverTy.isValid()) {
203     assert(LeftoverRegs.empty());
204 
205     if (!ResultTy.isVector()) {
206       MIRBuilder.buildMerge(DstReg, PartRegs);
207       return;
208     }
209 
210     if (PartTy.isVector())
211       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
212     else
213       MIRBuilder.buildBuildVector(DstReg, PartRegs);
214     return;
215   }
216 
217   unsigned PartSize = PartTy.getSizeInBits();
218   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
219 
220   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
221   MIRBuilder.buildUndef(CurResultReg);
222 
223   unsigned Offset = 0;
224   for (Register PartReg : PartRegs) {
225     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
226     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
227     CurResultReg = NewResultReg;
228     Offset += PartSize;
229   }
230 
231   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
232     // Use the original output register for the final insert to avoid a copy.
233     Register NewResultReg = (I + 1 == E) ?
234       DstReg : MRI.createGenericVirtualRegister(ResultTy);
235 
236     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
237     CurResultReg = NewResultReg;
238     Offset += LeftoverPartSize;
239   }
240 }
241 
242 /// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
243 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
244                               const MachineInstr &MI) {
245   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
246 
247   const int NumResults = MI.getNumOperands() - 1;
248   Regs.resize(NumResults);
249   for (int I = 0; I != NumResults; ++I)
250     Regs[I] = MI.getOperand(I).getReg();
251 }
252 
253 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
254                                     LLT NarrowTy, Register SrcReg) {
255   LLT SrcTy = MRI.getType(SrcReg);
256 
257   LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy));
258   if (SrcTy == GCDTy) {
259     // If the source already evenly divides the result type, we don't need to do
260     // anything.
261     Parts.push_back(SrcReg);
262   } else {
263     // Need to split into common type sized pieces.
264     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
265     getUnmergeResults(Parts, *Unmerge);
266   }
267 
268   return GCDTy;
269 }
270 
271 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
272                                          SmallVectorImpl<Register> &VRegs,
273                                          unsigned PadStrategy) {
274   LLT LCMTy = getLCMType(DstTy, NarrowTy);
275 
276   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
277   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
278   int NumOrigSrc = VRegs.size();
279 
280   Register PadReg;
281 
282   // Get a value we can use to pad the source value if the sources won't evenly
283   // cover the result type.
284   if (NumOrigSrc < NumParts * NumSubParts) {
285     if (PadStrategy == TargetOpcode::G_ZEXT)
286       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
287     else if (PadStrategy == TargetOpcode::G_ANYEXT)
288       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
289     else {
290       assert(PadStrategy == TargetOpcode::G_SEXT);
291 
292       // Shift the sign bit of the low register through the high register.
293       auto ShiftAmt =
294         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
295       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
296     }
297   }
298 
299   // Registers for the final merge to be produced.
300   SmallVector<Register, 4> Remerge(NumParts);
301 
302   // Registers needed for intermediate merges, which will be merged into a
303   // source for Remerge.
304   SmallVector<Register, 4> SubMerge(NumSubParts);
305 
306   // Once we've fully read off the end of the original source bits, we can reuse
307   // the same high bits for remaining padding elements.
308   Register AllPadReg;
309 
310   // Build merges to the LCM type to cover the original result type.
311   for (int I = 0; I != NumParts; ++I) {
312     bool AllMergePartsArePadding = true;
313 
314     // Build the requested merges to the requested type.
315     for (int J = 0; J != NumSubParts; ++J) {
316       int Idx = I * NumSubParts + J;
317       if (Idx >= NumOrigSrc) {
318         SubMerge[J] = PadReg;
319         continue;
320       }
321 
322       SubMerge[J] = VRegs[Idx];
323 
324       // There are meaningful bits here we can't reuse later.
325       AllMergePartsArePadding = false;
326     }
327 
328     // If we've filled up a complete piece with padding bits, we can directly
329     // emit the natural sized constant if applicable, rather than a merge of
330     // smaller constants.
331     if (AllMergePartsArePadding && !AllPadReg) {
332       if (PadStrategy == TargetOpcode::G_ANYEXT)
333         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
334       else if (PadStrategy == TargetOpcode::G_ZEXT)
335         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
336 
337       // If this is a sign extension, we can't materialize a trivial constant
338       // with the right type and have to produce a merge.
339     }
340 
341     if (AllPadReg) {
342       // Avoid creating additional instructions if we're just adding additional
343       // copies of padding bits.
344       Remerge[I] = AllPadReg;
345       continue;
346     }
347 
348     if (NumSubParts == 1)
349       Remerge[I] = SubMerge[0];
350     else
351       Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
352 
353     // In the sign extend padding case, re-use the first all-signbit merge.
354     if (AllMergePartsArePadding && !AllPadReg)
355       AllPadReg = Remerge[I];
356   }
357 
358   VRegs = std::move(Remerge);
359   return LCMTy;
360 }
361 
362 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
363                                                ArrayRef<Register> RemergeRegs) {
364   LLT DstTy = MRI.getType(DstReg);
365 
366   // Create the merge to the widened source, and extract the relevant bits into
367   // the result.
368 
369   if (DstTy == LCMTy) {
370     MIRBuilder.buildMerge(DstReg, RemergeRegs);
371     return;
372   }
373 
374   auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
375   if (DstTy.isScalar() && LCMTy.isScalar()) {
376     MIRBuilder.buildTrunc(DstReg, Remerge);
377     return;
378   }
379 
380   if (LCMTy.isVector()) {
381     MIRBuilder.buildExtract(DstReg, Remerge, 0);
382     return;
383   }
384 
385   llvm_unreachable("unhandled case");
386 }
387 
388 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
389 #define RTLIBCASE(LibcallPrefix)                                               \
390   do {                                                                         \
391     switch (Size) {                                                            \
392     case 32:                                                                   \
393       return RTLIB::LibcallPrefix##32;                                         \
394     case 64:                                                                   \
395       return RTLIB::LibcallPrefix##64;                                         \
396     case 128:                                                                  \
397       return RTLIB::LibcallPrefix##128;                                        \
398     default:                                                                   \
399       llvm_unreachable("unexpected size");                                     \
400     }                                                                          \
401   } while (0)
402 
403   assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
404 
405   switch (Opcode) {
406   case TargetOpcode::G_SDIV:
407     RTLIBCASE(SDIV_I);
408   case TargetOpcode::G_UDIV:
409     RTLIBCASE(UDIV_I);
410   case TargetOpcode::G_SREM:
411     RTLIBCASE(SREM_I);
412   case TargetOpcode::G_UREM:
413     RTLIBCASE(UREM_I);
414   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
415     RTLIBCASE(CTLZ_I);
416   case TargetOpcode::G_FADD:
417     RTLIBCASE(ADD_F);
418   case TargetOpcode::G_FSUB:
419     RTLIBCASE(SUB_F);
420   case TargetOpcode::G_FMUL:
421     RTLIBCASE(MUL_F);
422   case TargetOpcode::G_FDIV:
423     RTLIBCASE(DIV_F);
424   case TargetOpcode::G_FEXP:
425     RTLIBCASE(EXP_F);
426   case TargetOpcode::G_FEXP2:
427     RTLIBCASE(EXP2_F);
428   case TargetOpcode::G_FREM:
429     RTLIBCASE(REM_F);
430   case TargetOpcode::G_FPOW:
431     RTLIBCASE(POW_F);
432   case TargetOpcode::G_FMA:
433     RTLIBCASE(FMA_F);
434   case TargetOpcode::G_FSIN:
435     RTLIBCASE(SIN_F);
436   case TargetOpcode::G_FCOS:
437     RTLIBCASE(COS_F);
438   case TargetOpcode::G_FLOG10:
439     RTLIBCASE(LOG10_F);
440   case TargetOpcode::G_FLOG:
441     RTLIBCASE(LOG_F);
442   case TargetOpcode::G_FLOG2:
443     RTLIBCASE(LOG2_F);
444   case TargetOpcode::G_FCEIL:
445     RTLIBCASE(CEIL_F);
446   case TargetOpcode::G_FFLOOR:
447     RTLIBCASE(FLOOR_F);
448   case TargetOpcode::G_FMINNUM:
449     RTLIBCASE(FMIN_F);
450   case TargetOpcode::G_FMAXNUM:
451     RTLIBCASE(FMAX_F);
452   case TargetOpcode::G_FSQRT:
453     RTLIBCASE(SQRT_F);
454   case TargetOpcode::G_FRINT:
455     RTLIBCASE(RINT_F);
456   case TargetOpcode::G_FNEARBYINT:
457     RTLIBCASE(NEARBYINT_F);
458   }
459   llvm_unreachable("Unknown libcall function");
460 }
461 
462 /// True if an instruction is in tail position in its caller. Intended for
463 /// legalizing libcalls as tail calls when possible.
464 static bool isLibCallInTailPosition(MachineInstr &MI) {
465   MachineBasicBlock &MBB = *MI.getParent();
466   const Function &F = MBB.getParent()->getFunction();
467 
468   // Conservatively require the attributes of the call to match those of
469   // the return. Ignore NoAlias and NonNull because they don't affect the
470   // call sequence.
471   AttributeList CallerAttrs = F.getAttributes();
472   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
473           .removeAttribute(Attribute::NoAlias)
474           .removeAttribute(Attribute::NonNull)
475           .hasAttributes())
476     return false;
477 
478   // It's not safe to eliminate the sign / zero extension of the return value.
479   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
480       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
481     return false;
482 
483   // Only tail call if the following instruction is a standard return.
484   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
485   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
486   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
487     return false;
488 
489   return true;
490 }
491 
492 LegalizerHelper::LegalizeResult
493 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
494                     const CallLowering::ArgInfo &Result,
495                     ArrayRef<CallLowering::ArgInfo> Args,
496                     const CallingConv::ID CC) {
497   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
498 
499   CallLowering::CallLoweringInfo Info;
500   Info.CallConv = CC;
501   Info.Callee = MachineOperand::CreateES(Name);
502   Info.OrigRet = Result;
503   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
504   if (!CLI.lowerCall(MIRBuilder, Info))
505     return LegalizerHelper::UnableToLegalize;
506 
507   return LegalizerHelper::Legalized;
508 }
509 
510 LegalizerHelper::LegalizeResult
511 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
512                     const CallLowering::ArgInfo &Result,
513                     ArrayRef<CallLowering::ArgInfo> Args) {
514   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
515   const char *Name = TLI.getLibcallName(Libcall);
516   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
517   return createLibcall(MIRBuilder, Name, Result, Args, CC);
518 }
519 
520 // Useful for libcalls where all operands have the same type.
521 static LegalizerHelper::LegalizeResult
522 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
523               Type *OpType) {
524   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
525 
526   SmallVector<CallLowering::ArgInfo, 3> Args;
527   for (unsigned i = 1; i < MI.getNumOperands(); i++)
528     Args.push_back({MI.getOperand(i).getReg(), OpType});
529   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
530                        Args);
531 }
532 
533 LegalizerHelper::LegalizeResult
534 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
535                        MachineInstr &MI) {
536   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
537   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
538 
539   SmallVector<CallLowering::ArgInfo, 3> Args;
540   // Add all the args, except for the last which is an imm denoting 'tail'.
541   for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
542     Register Reg = MI.getOperand(i).getReg();
543 
544     // Need derive an IR type for call lowering.
545     LLT OpLLT = MRI.getType(Reg);
546     Type *OpTy = nullptr;
547     if (OpLLT.isPointer())
548       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
549     else
550       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
551     Args.push_back({Reg, OpTy});
552   }
553 
554   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
555   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
556   Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
557   RTLIB::Libcall RTLibcall;
558   switch (ID) {
559   case Intrinsic::memcpy:
560     RTLibcall = RTLIB::MEMCPY;
561     break;
562   case Intrinsic::memset:
563     RTLibcall = RTLIB::MEMSET;
564     break;
565   case Intrinsic::memmove:
566     RTLibcall = RTLIB::MEMMOVE;
567     break;
568   default:
569     return LegalizerHelper::UnableToLegalize;
570   }
571   const char *Name = TLI.getLibcallName(RTLibcall);
572 
573   MIRBuilder.setInstrAndDebugLoc(MI);
574 
575   CallLowering::CallLoweringInfo Info;
576   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
577   Info.Callee = MachineOperand::CreateES(Name);
578   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
579   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
580                     isLibCallInTailPosition(MI);
581 
582   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
583   if (!CLI.lowerCall(MIRBuilder, Info))
584     return LegalizerHelper::UnableToLegalize;
585 
586   if (Info.LoweredTailCall) {
587     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
588     // We must have a return following the call (or debug insts) to get past
589     // isLibCallInTailPosition.
590     do {
591       MachineInstr *Next = MI.getNextNode();
592       assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
593              "Expected instr following MI to be return or debug inst?");
594       // We lowered a tail call, so the call is now the return from the block.
595       // Delete the old return.
596       Next->eraseFromParent();
597     } while (MI.getNextNode());
598   }
599 
600   return LegalizerHelper::Legalized;
601 }
602 
603 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
604                                        Type *FromType) {
605   auto ToMVT = MVT::getVT(ToType);
606   auto FromMVT = MVT::getVT(FromType);
607 
608   switch (Opcode) {
609   case TargetOpcode::G_FPEXT:
610     return RTLIB::getFPEXT(FromMVT, ToMVT);
611   case TargetOpcode::G_FPTRUNC:
612     return RTLIB::getFPROUND(FromMVT, ToMVT);
613   case TargetOpcode::G_FPTOSI:
614     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
615   case TargetOpcode::G_FPTOUI:
616     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
617   case TargetOpcode::G_SITOFP:
618     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
619   case TargetOpcode::G_UITOFP:
620     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
621   }
622   llvm_unreachable("Unsupported libcall function");
623 }
624 
625 static LegalizerHelper::LegalizeResult
626 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
627                   Type *FromType) {
628   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
629   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
630                        {{MI.getOperand(1).getReg(), FromType}});
631 }
632 
633 LegalizerHelper::LegalizeResult
634 LegalizerHelper::libcall(MachineInstr &MI) {
635   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
636   unsigned Size = LLTy.getSizeInBits();
637   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
638 
639   MIRBuilder.setInstrAndDebugLoc(MI);
640 
641   switch (MI.getOpcode()) {
642   default:
643     return UnableToLegalize;
644   case TargetOpcode::G_SDIV:
645   case TargetOpcode::G_UDIV:
646   case TargetOpcode::G_SREM:
647   case TargetOpcode::G_UREM:
648   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
649     Type *HLTy = IntegerType::get(Ctx, Size);
650     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
651     if (Status != Legalized)
652       return Status;
653     break;
654   }
655   case TargetOpcode::G_FADD:
656   case TargetOpcode::G_FSUB:
657   case TargetOpcode::G_FMUL:
658   case TargetOpcode::G_FDIV:
659   case TargetOpcode::G_FMA:
660   case TargetOpcode::G_FPOW:
661   case TargetOpcode::G_FREM:
662   case TargetOpcode::G_FCOS:
663   case TargetOpcode::G_FSIN:
664   case TargetOpcode::G_FLOG10:
665   case TargetOpcode::G_FLOG:
666   case TargetOpcode::G_FLOG2:
667   case TargetOpcode::G_FEXP:
668   case TargetOpcode::G_FEXP2:
669   case TargetOpcode::G_FCEIL:
670   case TargetOpcode::G_FFLOOR:
671   case TargetOpcode::G_FMINNUM:
672   case TargetOpcode::G_FMAXNUM:
673   case TargetOpcode::G_FSQRT:
674   case TargetOpcode::G_FRINT:
675   case TargetOpcode::G_FNEARBYINT: {
676     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
677     if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) {
678       LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n");
679       return UnableToLegalize;
680     }
681     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
682     if (Status != Legalized)
683       return Status;
684     break;
685   }
686   case TargetOpcode::G_FPEXT:
687   case TargetOpcode::G_FPTRUNC: {
688     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
689     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
690     if (!FromTy || !ToTy)
691       return UnableToLegalize;
692     LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
693     if (Status != Legalized)
694       return Status;
695     break;
696   }
697   case TargetOpcode::G_FPTOSI:
698   case TargetOpcode::G_FPTOUI: {
699     // FIXME: Support other types
700     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
701     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
702     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
703       return UnableToLegalize;
704     LegalizeResult Status = conversionLibcall(
705         MI, MIRBuilder,
706         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
707         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
708     if (Status != Legalized)
709       return Status;
710     break;
711   }
712   case TargetOpcode::G_SITOFP:
713   case TargetOpcode::G_UITOFP: {
714     // FIXME: Support other types
715     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
716     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
717     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
718       return UnableToLegalize;
719     LegalizeResult Status = conversionLibcall(
720         MI, MIRBuilder,
721         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
722         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
723     if (Status != Legalized)
724       return Status;
725     break;
726   }
727   }
728 
729   MI.eraseFromParent();
730   return Legalized;
731 }
732 
733 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
734                                                               unsigned TypeIdx,
735                                                               LLT NarrowTy) {
736   MIRBuilder.setInstrAndDebugLoc(MI);
737 
738   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
739   uint64_t NarrowSize = NarrowTy.getSizeInBits();
740 
741   switch (MI.getOpcode()) {
742   default:
743     return UnableToLegalize;
744   case TargetOpcode::G_IMPLICIT_DEF: {
745     Register DstReg = MI.getOperand(0).getReg();
746     LLT DstTy = MRI.getType(DstReg);
747 
748     // If SizeOp0 is not an exact multiple of NarrowSize, emit
749     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
750     // FIXME: Although this would also be legal for the general case, it causes
751     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
752     //  combines not being hit). This seems to be a problem related to the
753     //  artifact combiner.
754     if (SizeOp0 % NarrowSize != 0) {
755       LLT ImplicitTy = NarrowTy;
756       if (DstTy.isVector())
757         ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
758 
759       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
760       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
761 
762       MI.eraseFromParent();
763       return Legalized;
764     }
765 
766     int NumParts = SizeOp0 / NarrowSize;
767 
768     SmallVector<Register, 2> DstRegs;
769     for (int i = 0; i < NumParts; ++i)
770       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
771 
772     if (DstTy.isVector())
773       MIRBuilder.buildBuildVector(DstReg, DstRegs);
774     else
775       MIRBuilder.buildMerge(DstReg, DstRegs);
776     MI.eraseFromParent();
777     return Legalized;
778   }
779   case TargetOpcode::G_CONSTANT: {
780     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
781     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
782     unsigned TotalSize = Ty.getSizeInBits();
783     unsigned NarrowSize = NarrowTy.getSizeInBits();
784     int NumParts = TotalSize / NarrowSize;
785 
786     SmallVector<Register, 4> PartRegs;
787     for (int I = 0; I != NumParts; ++I) {
788       unsigned Offset = I * NarrowSize;
789       auto K = MIRBuilder.buildConstant(NarrowTy,
790                                         Val.lshr(Offset).trunc(NarrowSize));
791       PartRegs.push_back(K.getReg(0));
792     }
793 
794     LLT LeftoverTy;
795     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
796     SmallVector<Register, 1> LeftoverRegs;
797     if (LeftoverBits != 0) {
798       LeftoverTy = LLT::scalar(LeftoverBits);
799       auto K = MIRBuilder.buildConstant(
800         LeftoverTy,
801         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
802       LeftoverRegs.push_back(K.getReg(0));
803     }
804 
805     insertParts(MI.getOperand(0).getReg(),
806                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
807 
808     MI.eraseFromParent();
809     return Legalized;
810   }
811   case TargetOpcode::G_SEXT:
812   case TargetOpcode::G_ZEXT:
813   case TargetOpcode::G_ANYEXT:
814     return narrowScalarExt(MI, TypeIdx, NarrowTy);
815   case TargetOpcode::G_TRUNC: {
816     if (TypeIdx != 1)
817       return UnableToLegalize;
818 
819     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
820     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
821       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
822       return UnableToLegalize;
823     }
824 
825     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
826     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
827     MI.eraseFromParent();
828     return Legalized;
829   }
830 
831   case TargetOpcode::G_FREEZE:
832     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
833 
834   case TargetOpcode::G_ADD: {
835     // FIXME: add support for when SizeOp0 isn't an exact multiple of
836     // NarrowSize.
837     if (SizeOp0 % NarrowSize != 0)
838       return UnableToLegalize;
839     // Expand in terms of carry-setting/consuming G_ADDE instructions.
840     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
841 
842     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
843     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
844     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
845 
846     Register CarryIn;
847     for (int i = 0; i < NumParts; ++i) {
848       Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
849       Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
850 
851       if (i == 0)
852         MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
853       else {
854         MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
855                               Src2Regs[i], CarryIn);
856       }
857 
858       DstRegs.push_back(DstReg);
859       CarryIn = CarryOut;
860     }
861     Register DstReg = MI.getOperand(0).getReg();
862     if(MRI.getType(DstReg).isVector())
863       MIRBuilder.buildBuildVector(DstReg, DstRegs);
864     else
865       MIRBuilder.buildMerge(DstReg, DstRegs);
866     MI.eraseFromParent();
867     return Legalized;
868   }
869   case TargetOpcode::G_SUB: {
870     // FIXME: add support for when SizeOp0 isn't an exact multiple of
871     // NarrowSize.
872     if (SizeOp0 % NarrowSize != 0)
873       return UnableToLegalize;
874 
875     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
876 
877     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
878     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
879     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
880 
881     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
882     Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
883     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
884                           {Src1Regs[0], Src2Regs[0]});
885     DstRegs.push_back(DstReg);
886     Register BorrowIn = BorrowOut;
887     for (int i = 1; i < NumParts; ++i) {
888       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
889       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
890 
891       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
892                             {Src1Regs[i], Src2Regs[i], BorrowIn});
893 
894       DstRegs.push_back(DstReg);
895       BorrowIn = BorrowOut;
896     }
897     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
898     MI.eraseFromParent();
899     return Legalized;
900   }
901   case TargetOpcode::G_MUL:
902   case TargetOpcode::G_UMULH:
903     return narrowScalarMul(MI, NarrowTy);
904   case TargetOpcode::G_EXTRACT:
905     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
906   case TargetOpcode::G_INSERT:
907     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
908   case TargetOpcode::G_LOAD: {
909     const auto &MMO = **MI.memoperands_begin();
910     Register DstReg = MI.getOperand(0).getReg();
911     LLT DstTy = MRI.getType(DstReg);
912     if (DstTy.isVector())
913       return UnableToLegalize;
914 
915     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
916       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
917       auto &MMO = **MI.memoperands_begin();
918       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
919       MIRBuilder.buildAnyExt(DstReg, TmpReg);
920       MI.eraseFromParent();
921       return Legalized;
922     }
923 
924     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
925   }
926   case TargetOpcode::G_ZEXTLOAD:
927   case TargetOpcode::G_SEXTLOAD: {
928     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
929     Register DstReg = MI.getOperand(0).getReg();
930     Register PtrReg = MI.getOperand(1).getReg();
931 
932     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
933     auto &MMO = **MI.memoperands_begin();
934     if (MMO.getSizeInBits() == NarrowSize) {
935       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
936     } else {
937       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
938     }
939 
940     if (ZExt)
941       MIRBuilder.buildZExt(DstReg, TmpReg);
942     else
943       MIRBuilder.buildSExt(DstReg, TmpReg);
944 
945     MI.eraseFromParent();
946     return Legalized;
947   }
948   case TargetOpcode::G_STORE: {
949     const auto &MMO = **MI.memoperands_begin();
950 
951     Register SrcReg = MI.getOperand(0).getReg();
952     LLT SrcTy = MRI.getType(SrcReg);
953     if (SrcTy.isVector())
954       return UnableToLegalize;
955 
956     int NumParts = SizeOp0 / NarrowSize;
957     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
958     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
959     if (SrcTy.isVector() && LeftoverBits != 0)
960       return UnableToLegalize;
961 
962     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
963       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
964       auto &MMO = **MI.memoperands_begin();
965       MIRBuilder.buildTrunc(TmpReg, SrcReg);
966       MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
967       MI.eraseFromParent();
968       return Legalized;
969     }
970 
971     return reduceLoadStoreWidth(MI, 0, NarrowTy);
972   }
973   case TargetOpcode::G_SELECT:
974     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
975   case TargetOpcode::G_AND:
976   case TargetOpcode::G_OR:
977   case TargetOpcode::G_XOR: {
978     // Legalize bitwise operation:
979     // A = BinOp<Ty> B, C
980     // into:
981     // B1, ..., BN = G_UNMERGE_VALUES B
982     // C1, ..., CN = G_UNMERGE_VALUES C
983     // A1 = BinOp<Ty/N> B1, C2
984     // ...
985     // AN = BinOp<Ty/N> BN, CN
986     // A = G_MERGE_VALUES A1, ..., AN
987     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
988   }
989   case TargetOpcode::G_SHL:
990   case TargetOpcode::G_LSHR:
991   case TargetOpcode::G_ASHR:
992     return narrowScalarShift(MI, TypeIdx, NarrowTy);
993   case TargetOpcode::G_CTLZ:
994   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
995   case TargetOpcode::G_CTTZ:
996   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
997   case TargetOpcode::G_CTPOP:
998     if (TypeIdx == 1)
999       switch (MI.getOpcode()) {
1000       case TargetOpcode::G_CTLZ:
1001       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1002         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1003       case TargetOpcode::G_CTTZ:
1004       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1005         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1006       case TargetOpcode::G_CTPOP:
1007         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1008       default:
1009         return UnableToLegalize;
1010       }
1011 
1012     Observer.changingInstr(MI);
1013     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1014     Observer.changedInstr(MI);
1015     return Legalized;
1016   case TargetOpcode::G_INTTOPTR:
1017     if (TypeIdx != 1)
1018       return UnableToLegalize;
1019 
1020     Observer.changingInstr(MI);
1021     narrowScalarSrc(MI, NarrowTy, 1);
1022     Observer.changedInstr(MI);
1023     return Legalized;
1024   case TargetOpcode::G_PTRTOINT:
1025     if (TypeIdx != 0)
1026       return UnableToLegalize;
1027 
1028     Observer.changingInstr(MI);
1029     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1030     Observer.changedInstr(MI);
1031     return Legalized;
1032   case TargetOpcode::G_PHI: {
1033     unsigned NumParts = SizeOp0 / NarrowSize;
1034     SmallVector<Register, 2> DstRegs(NumParts);
1035     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1036     Observer.changingInstr(MI);
1037     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1038       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1039       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1040       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1041                    SrcRegs[i / 2]);
1042     }
1043     MachineBasicBlock &MBB = *MI.getParent();
1044     MIRBuilder.setInsertPt(MBB, MI);
1045     for (unsigned i = 0; i < NumParts; ++i) {
1046       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1047       MachineInstrBuilder MIB =
1048           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1049       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1050         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1051     }
1052     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1053     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1054     Observer.changedInstr(MI);
1055     MI.eraseFromParent();
1056     return Legalized;
1057   }
1058   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1059   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1060     if (TypeIdx != 2)
1061       return UnableToLegalize;
1062 
1063     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1064     Observer.changingInstr(MI);
1065     narrowScalarSrc(MI, NarrowTy, OpIdx);
1066     Observer.changedInstr(MI);
1067     return Legalized;
1068   }
1069   case TargetOpcode::G_ICMP: {
1070     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1071     if (NarrowSize * 2 != SrcSize)
1072       return UnableToLegalize;
1073 
1074     Observer.changingInstr(MI);
1075     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1076     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1077     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1078 
1079     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1080     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1081     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1082 
1083     CmpInst::Predicate Pred =
1084         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1085     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1086 
1087     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1088       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1089       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1090       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1091       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1092       MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1093     } else {
1094       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1095       MachineInstrBuilder CmpHEQ =
1096           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1097       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1098           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1099       MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1100     }
1101     Observer.changedInstr(MI);
1102     MI.eraseFromParent();
1103     return Legalized;
1104   }
1105   case TargetOpcode::G_SEXT_INREG: {
1106     if (TypeIdx != 0)
1107       return UnableToLegalize;
1108 
1109     int64_t SizeInBits = MI.getOperand(2).getImm();
1110 
1111     // So long as the new type has more bits than the bits we're extending we
1112     // don't need to break it apart.
1113     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1114       Observer.changingInstr(MI);
1115       // We don't lose any non-extension bits by truncating the src and
1116       // sign-extending the dst.
1117       MachineOperand &MO1 = MI.getOperand(1);
1118       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1119       MO1.setReg(TruncMIB.getReg(0));
1120 
1121       MachineOperand &MO2 = MI.getOperand(0);
1122       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1123       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1124       MIRBuilder.buildSExt(MO2, DstExt);
1125       MO2.setReg(DstExt);
1126       Observer.changedInstr(MI);
1127       return Legalized;
1128     }
1129 
1130     // Break it apart. Components below the extension point are unmodified. The
1131     // component containing the extension point becomes a narrower SEXT_INREG.
1132     // Components above it are ashr'd from the component containing the
1133     // extension point.
1134     if (SizeOp0 % NarrowSize != 0)
1135       return UnableToLegalize;
1136     int NumParts = SizeOp0 / NarrowSize;
1137 
1138     // List the registers where the destination will be scattered.
1139     SmallVector<Register, 2> DstRegs;
1140     // List the registers where the source will be split.
1141     SmallVector<Register, 2> SrcRegs;
1142 
1143     // Create all the temporary registers.
1144     for (int i = 0; i < NumParts; ++i) {
1145       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1146 
1147       SrcRegs.push_back(SrcReg);
1148     }
1149 
1150     // Explode the big arguments into smaller chunks.
1151     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1152 
1153     Register AshrCstReg =
1154         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1155             .getReg(0);
1156     Register FullExtensionReg = 0;
1157     Register PartialExtensionReg = 0;
1158 
1159     // Do the operation on each small part.
1160     for (int i = 0; i < NumParts; ++i) {
1161       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1162         DstRegs.push_back(SrcRegs[i]);
1163       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1164         assert(PartialExtensionReg &&
1165                "Expected to visit partial extension before full");
1166         if (FullExtensionReg) {
1167           DstRegs.push_back(FullExtensionReg);
1168           continue;
1169         }
1170         DstRegs.push_back(
1171             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1172                 .getReg(0));
1173         FullExtensionReg = DstRegs.back();
1174       } else {
1175         DstRegs.push_back(
1176             MIRBuilder
1177                 .buildInstr(
1178                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
1179                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1180                 .getReg(0));
1181         PartialExtensionReg = DstRegs.back();
1182       }
1183     }
1184 
1185     // Gather the destination registers into the final destination.
1186     Register DstReg = MI.getOperand(0).getReg();
1187     MIRBuilder.buildMerge(DstReg, DstRegs);
1188     MI.eraseFromParent();
1189     return Legalized;
1190   }
1191   case TargetOpcode::G_BSWAP:
1192   case TargetOpcode::G_BITREVERSE: {
1193     if (SizeOp0 % NarrowSize != 0)
1194       return UnableToLegalize;
1195 
1196     Observer.changingInstr(MI);
1197     SmallVector<Register, 2> SrcRegs, DstRegs;
1198     unsigned NumParts = SizeOp0 / NarrowSize;
1199     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1200 
1201     for (unsigned i = 0; i < NumParts; ++i) {
1202       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1203                                            {SrcRegs[NumParts - 1 - i]});
1204       DstRegs.push_back(DstPart.getReg(0));
1205     }
1206 
1207     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1208 
1209     Observer.changedInstr(MI);
1210     MI.eraseFromParent();
1211     return Legalized;
1212   }
1213   }
1214 }
1215 
1216 Register LegalizerHelper::coerceToScalar(Register Val) {
1217   LLT Ty = MRI.getType(Val);
1218   if (Ty.isScalar())
1219     return Val;
1220 
1221   const DataLayout &DL = MIRBuilder.getDataLayout();
1222   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1223   if (Ty.isPointer()) {
1224     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1225       return Register();
1226     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1227   }
1228 
1229   Register NewVal = Val;
1230 
1231   assert(Ty.isVector());
1232   LLT EltTy = Ty.getElementType();
1233   if (EltTy.isPointer())
1234     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1235   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1236 }
1237 
1238 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1239                                      unsigned OpIdx, unsigned ExtOpcode) {
1240   MachineOperand &MO = MI.getOperand(OpIdx);
1241   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1242   MO.setReg(ExtB.getReg(0));
1243 }
1244 
1245 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1246                                       unsigned OpIdx) {
1247   MachineOperand &MO = MI.getOperand(OpIdx);
1248   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1249   MO.setReg(ExtB.getReg(0));
1250 }
1251 
1252 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1253                                      unsigned OpIdx, unsigned TruncOpcode) {
1254   MachineOperand &MO = MI.getOperand(OpIdx);
1255   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1256   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1257   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1258   MO.setReg(DstExt);
1259 }
1260 
1261 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1262                                       unsigned OpIdx, unsigned ExtOpcode) {
1263   MachineOperand &MO = MI.getOperand(OpIdx);
1264   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1265   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1266   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1267   MO.setReg(DstTrunc);
1268 }
1269 
1270 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1271                                             unsigned OpIdx) {
1272   MachineOperand &MO = MI.getOperand(OpIdx);
1273   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1274   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1275   MIRBuilder.buildExtract(MO, DstExt, 0);
1276   MO.setReg(DstExt);
1277 }
1278 
1279 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1280                                             unsigned OpIdx) {
1281   MachineOperand &MO = MI.getOperand(OpIdx);
1282 
1283   LLT OldTy = MRI.getType(MO.getReg());
1284   unsigned OldElts = OldTy.getNumElements();
1285   unsigned NewElts = MoreTy.getNumElements();
1286 
1287   unsigned NumParts = NewElts / OldElts;
1288 
1289   // Use concat_vectors if the result is a multiple of the number of elements.
1290   if (NumParts * OldElts == NewElts) {
1291     SmallVector<Register, 8> Parts;
1292     Parts.push_back(MO.getReg());
1293 
1294     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1295     for (unsigned I = 1; I != NumParts; ++I)
1296       Parts.push_back(ImpDef);
1297 
1298     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1299     MO.setReg(Concat.getReg(0));
1300     return;
1301   }
1302 
1303   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1304   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1305   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1306   MO.setReg(MoreReg);
1307 }
1308 
1309 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1310   MachineOperand &Op = MI.getOperand(OpIdx);
1311   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1312 }
1313 
1314 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1315   MachineOperand &MO = MI.getOperand(OpIdx);
1316   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1317   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1318   MIRBuilder.buildBitcast(MO, CastDst);
1319   MO.setReg(CastDst);
1320 }
1321 
1322 LegalizerHelper::LegalizeResult
1323 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1324                                         LLT WideTy) {
1325   if (TypeIdx != 1)
1326     return UnableToLegalize;
1327 
1328   Register DstReg = MI.getOperand(0).getReg();
1329   LLT DstTy = MRI.getType(DstReg);
1330   if (DstTy.isVector())
1331     return UnableToLegalize;
1332 
1333   Register Src1 = MI.getOperand(1).getReg();
1334   LLT SrcTy = MRI.getType(Src1);
1335   const int DstSize = DstTy.getSizeInBits();
1336   const int SrcSize = SrcTy.getSizeInBits();
1337   const int WideSize = WideTy.getSizeInBits();
1338   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1339 
1340   unsigned NumOps = MI.getNumOperands();
1341   unsigned NumSrc = MI.getNumOperands() - 1;
1342   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1343 
1344   if (WideSize >= DstSize) {
1345     // Directly pack the bits in the target type.
1346     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1347 
1348     for (unsigned I = 2; I != NumOps; ++I) {
1349       const unsigned Offset = (I - 1) * PartSize;
1350 
1351       Register SrcReg = MI.getOperand(I).getReg();
1352       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1353 
1354       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1355 
1356       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1357         MRI.createGenericVirtualRegister(WideTy);
1358 
1359       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1360       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1361       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1362       ResultReg = NextResult;
1363     }
1364 
1365     if (WideSize > DstSize)
1366       MIRBuilder.buildTrunc(DstReg, ResultReg);
1367     else if (DstTy.isPointer())
1368       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1369 
1370     MI.eraseFromParent();
1371     return Legalized;
1372   }
1373 
1374   // Unmerge the original values to the GCD type, and recombine to the next
1375   // multiple greater than the original type.
1376   //
1377   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1378   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1379   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1380   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1381   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1382   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1383   // %12:_(s12) = G_MERGE_VALUES %10, %11
1384   //
1385   // Padding with undef if necessary:
1386   //
1387   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1388   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1389   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1390   // %7:_(s2) = G_IMPLICIT_DEF
1391   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1392   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1393   // %10:_(s12) = G_MERGE_VALUES %8, %9
1394 
1395   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1396   LLT GCDTy = LLT::scalar(GCD);
1397 
1398   SmallVector<Register, 8> Parts;
1399   SmallVector<Register, 8> NewMergeRegs;
1400   SmallVector<Register, 8> Unmerges;
1401   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1402 
1403   // Decompose the original operands if they don't evenly divide.
1404   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1405     Register SrcReg = MI.getOperand(I).getReg();
1406     if (GCD == SrcSize) {
1407       Unmerges.push_back(SrcReg);
1408     } else {
1409       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1410       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1411         Unmerges.push_back(Unmerge.getReg(J));
1412     }
1413   }
1414 
1415   // Pad with undef to the next size that is a multiple of the requested size.
1416   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1417     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1418     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1419       Unmerges.push_back(UndefReg);
1420   }
1421 
1422   const int PartsPerGCD = WideSize / GCD;
1423 
1424   // Build merges of each piece.
1425   ArrayRef<Register> Slicer(Unmerges);
1426   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1427     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1428     NewMergeRegs.push_back(Merge.getReg(0));
1429   }
1430 
1431   // A truncate may be necessary if the requested type doesn't evenly divide the
1432   // original result type.
1433   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1434     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1435   } else {
1436     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1437     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1438   }
1439 
1440   MI.eraseFromParent();
1441   return Legalized;
1442 }
1443 
1444 LegalizerHelper::LegalizeResult
1445 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1446                                           LLT WideTy) {
1447   if (TypeIdx != 0)
1448     return UnableToLegalize;
1449 
1450   int NumDst = MI.getNumOperands() - 1;
1451   Register SrcReg = MI.getOperand(NumDst).getReg();
1452   LLT SrcTy = MRI.getType(SrcReg);
1453   if (SrcTy.isVector())
1454     return UnableToLegalize;
1455 
1456   Register Dst0Reg = MI.getOperand(0).getReg();
1457   LLT DstTy = MRI.getType(Dst0Reg);
1458   if (!DstTy.isScalar())
1459     return UnableToLegalize;
1460 
1461   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1462     if (SrcTy.isPointer()) {
1463       const DataLayout &DL = MIRBuilder.getDataLayout();
1464       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1465         LLVM_DEBUG(
1466             dbgs() << "Not casting non-integral address space integer\n");
1467         return UnableToLegalize;
1468       }
1469 
1470       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1471       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1472     }
1473 
1474     // Widen SrcTy to WideTy. This does not affect the result, but since the
1475     // user requested this size, it is probably better handled than SrcTy and
1476     // should reduce the total number of legalization artifacts
1477     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1478       SrcTy = WideTy;
1479       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1480     }
1481 
1482     // Theres no unmerge type to target. Directly extract the bits from the
1483     // source type
1484     unsigned DstSize = DstTy.getSizeInBits();
1485 
1486     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1487     for (int I = 1; I != NumDst; ++I) {
1488       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1489       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1490       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1491     }
1492 
1493     MI.eraseFromParent();
1494     return Legalized;
1495   }
1496 
1497   // Extend the source to a wider type.
1498   LLT LCMTy = getLCMType(SrcTy, WideTy);
1499 
1500   Register WideSrc = SrcReg;
1501   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1502     // TODO: If this is an integral address space, cast to integer and anyext.
1503     if (SrcTy.isPointer()) {
1504       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1505       return UnableToLegalize;
1506     }
1507 
1508     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1509   }
1510 
1511   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1512 
1513   // Create a sequence of unmerges to the original results. since we may have
1514   // widened the source, we will need to pad the results with dead defs to cover
1515   // the source register.
1516   // e.g. widen s16 to s32:
1517   // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
1518   //
1519   // =>
1520   //  %4:_(s64) = G_ANYEXT %0:_(s48)
1521   //  %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge
1522   //  %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs
1523   //  %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def
1524 
1525   const int NumUnmerge = Unmerge->getNumOperands() - 1;
1526   const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1527 
1528   for (int I = 0; I != NumUnmerge; ++I) {
1529     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1530 
1531     for (int J = 0; J != PartsPerUnmerge; ++J) {
1532       int Idx = I * PartsPerUnmerge + J;
1533       if (Idx < NumDst)
1534         MIB.addDef(MI.getOperand(Idx).getReg());
1535       else {
1536         // Create dead def for excess components.
1537         MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1538       }
1539     }
1540 
1541     MIB.addUse(Unmerge.getReg(I));
1542   }
1543 
1544   MI.eraseFromParent();
1545   return Legalized;
1546 }
1547 
1548 LegalizerHelper::LegalizeResult
1549 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1550                                     LLT WideTy) {
1551   Register DstReg = MI.getOperand(0).getReg();
1552   Register SrcReg = MI.getOperand(1).getReg();
1553   LLT SrcTy = MRI.getType(SrcReg);
1554 
1555   LLT DstTy = MRI.getType(DstReg);
1556   unsigned Offset = MI.getOperand(2).getImm();
1557 
1558   if (TypeIdx == 0) {
1559     if (SrcTy.isVector() || DstTy.isVector())
1560       return UnableToLegalize;
1561 
1562     SrcOp Src(SrcReg);
1563     if (SrcTy.isPointer()) {
1564       // Extracts from pointers can be handled only if they are really just
1565       // simple integers.
1566       const DataLayout &DL = MIRBuilder.getDataLayout();
1567       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1568         return UnableToLegalize;
1569 
1570       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1571       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1572       SrcTy = SrcAsIntTy;
1573     }
1574 
1575     if (DstTy.isPointer())
1576       return UnableToLegalize;
1577 
1578     if (Offset == 0) {
1579       // Avoid a shift in the degenerate case.
1580       MIRBuilder.buildTrunc(DstReg,
1581                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1582       MI.eraseFromParent();
1583       return Legalized;
1584     }
1585 
1586     // Do a shift in the source type.
1587     LLT ShiftTy = SrcTy;
1588     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1589       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1590       ShiftTy = WideTy;
1591     } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1592       return UnableToLegalize;
1593 
1594     auto LShr = MIRBuilder.buildLShr(
1595       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1596     MIRBuilder.buildTrunc(DstReg, LShr);
1597     MI.eraseFromParent();
1598     return Legalized;
1599   }
1600 
1601   if (SrcTy.isScalar()) {
1602     Observer.changingInstr(MI);
1603     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1604     Observer.changedInstr(MI);
1605     return Legalized;
1606   }
1607 
1608   if (!SrcTy.isVector())
1609     return UnableToLegalize;
1610 
1611   if (DstTy != SrcTy.getElementType())
1612     return UnableToLegalize;
1613 
1614   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1615     return UnableToLegalize;
1616 
1617   Observer.changingInstr(MI);
1618   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1619 
1620   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1621                           Offset);
1622   widenScalarDst(MI, WideTy.getScalarType(), 0);
1623   Observer.changedInstr(MI);
1624   return Legalized;
1625 }
1626 
1627 LegalizerHelper::LegalizeResult
1628 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1629                                    LLT WideTy) {
1630   if (TypeIdx != 0)
1631     return UnableToLegalize;
1632   Observer.changingInstr(MI);
1633   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1634   widenScalarDst(MI, WideTy);
1635   Observer.changedInstr(MI);
1636   return Legalized;
1637 }
1638 
1639 LegalizerHelper::LegalizeResult
1640 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1641   MIRBuilder.setInstrAndDebugLoc(MI);
1642 
1643   switch (MI.getOpcode()) {
1644   default:
1645     return UnableToLegalize;
1646   case TargetOpcode::G_EXTRACT:
1647     return widenScalarExtract(MI, TypeIdx, WideTy);
1648   case TargetOpcode::G_INSERT:
1649     return widenScalarInsert(MI, TypeIdx, WideTy);
1650   case TargetOpcode::G_MERGE_VALUES:
1651     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1652   case TargetOpcode::G_UNMERGE_VALUES:
1653     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1654   case TargetOpcode::G_UADDO:
1655   case TargetOpcode::G_USUBO: {
1656     if (TypeIdx == 1)
1657       return UnableToLegalize; // TODO
1658     auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
1659     auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
1660     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1661                           ? TargetOpcode::G_ADD
1662                           : TargetOpcode::G_SUB;
1663     // Do the arithmetic in the larger type.
1664     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1665     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1666     APInt Mask =
1667         APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
1668     auto AndOp = MIRBuilder.buildAnd(
1669         WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
1670     // There is no overflow if the AndOp is the same as NewOp.
1671     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
1672     // Now trunc the NewOp to the original result.
1673     MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1674     MI.eraseFromParent();
1675     return Legalized;
1676   }
1677   case TargetOpcode::G_CTTZ:
1678   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1679   case TargetOpcode::G_CTLZ:
1680   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1681   case TargetOpcode::G_CTPOP: {
1682     if (TypeIdx == 0) {
1683       Observer.changingInstr(MI);
1684       widenScalarDst(MI, WideTy, 0);
1685       Observer.changedInstr(MI);
1686       return Legalized;
1687     }
1688 
1689     Register SrcReg = MI.getOperand(1).getReg();
1690 
1691     // First ZEXT the input.
1692     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1693     LLT CurTy = MRI.getType(SrcReg);
1694     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1695       // The count is the same in the larger type except if the original
1696       // value was zero.  This can be handled by setting the bit just off
1697       // the top of the original type.
1698       auto TopBit =
1699           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1700       MIBSrc = MIRBuilder.buildOr(
1701         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1702     }
1703 
1704     // Perform the operation at the larger size.
1705     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1706     // This is already the correct result for CTPOP and CTTZs
1707     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1708         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1709       // The correct result is NewOp - (Difference in widety and current ty).
1710       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1711       MIBNewOp = MIRBuilder.buildSub(
1712           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1713     }
1714 
1715     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1716     MI.eraseFromParent();
1717     return Legalized;
1718   }
1719   case TargetOpcode::G_BSWAP: {
1720     Observer.changingInstr(MI);
1721     Register DstReg = MI.getOperand(0).getReg();
1722 
1723     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1724     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1725     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1726     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1727 
1728     MI.getOperand(0).setReg(DstExt);
1729 
1730     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1731 
1732     LLT Ty = MRI.getType(DstReg);
1733     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1734     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1735     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1736 
1737     MIRBuilder.buildTrunc(DstReg, ShrReg);
1738     Observer.changedInstr(MI);
1739     return Legalized;
1740   }
1741   case TargetOpcode::G_BITREVERSE: {
1742     Observer.changingInstr(MI);
1743 
1744     Register DstReg = MI.getOperand(0).getReg();
1745     LLT Ty = MRI.getType(DstReg);
1746     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1747 
1748     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1749     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1750     MI.getOperand(0).setReg(DstExt);
1751     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1752 
1753     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1754     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1755     MIRBuilder.buildTrunc(DstReg, Shift);
1756     Observer.changedInstr(MI);
1757     return Legalized;
1758   }
1759   case TargetOpcode::G_FREEZE:
1760     Observer.changingInstr(MI);
1761     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1762     widenScalarDst(MI, WideTy);
1763     Observer.changedInstr(MI);
1764     return Legalized;
1765 
1766   case TargetOpcode::G_ADD:
1767   case TargetOpcode::G_AND:
1768   case TargetOpcode::G_MUL:
1769   case TargetOpcode::G_OR:
1770   case TargetOpcode::G_XOR:
1771   case TargetOpcode::G_SUB:
1772     // Perform operation at larger width (any extension is fines here, high bits
1773     // don't affect the result) and then truncate the result back to the
1774     // original type.
1775     Observer.changingInstr(MI);
1776     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1777     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1778     widenScalarDst(MI, WideTy);
1779     Observer.changedInstr(MI);
1780     return Legalized;
1781 
1782   case TargetOpcode::G_SHL:
1783     Observer.changingInstr(MI);
1784 
1785     if (TypeIdx == 0) {
1786       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1787       widenScalarDst(MI, WideTy);
1788     } else {
1789       assert(TypeIdx == 1);
1790       // The "number of bits to shift" operand must preserve its value as an
1791       // unsigned integer:
1792       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1793     }
1794 
1795     Observer.changedInstr(MI);
1796     return Legalized;
1797 
1798   case TargetOpcode::G_SDIV:
1799   case TargetOpcode::G_SREM:
1800   case TargetOpcode::G_SMIN:
1801   case TargetOpcode::G_SMAX:
1802     Observer.changingInstr(MI);
1803     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1804     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1805     widenScalarDst(MI, WideTy);
1806     Observer.changedInstr(MI);
1807     return Legalized;
1808 
1809   case TargetOpcode::G_ASHR:
1810   case TargetOpcode::G_LSHR:
1811     Observer.changingInstr(MI);
1812 
1813     if (TypeIdx == 0) {
1814       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1815         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1816 
1817       widenScalarSrc(MI, WideTy, 1, CvtOp);
1818       widenScalarDst(MI, WideTy);
1819     } else {
1820       assert(TypeIdx == 1);
1821       // The "number of bits to shift" operand must preserve its value as an
1822       // unsigned integer:
1823       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1824     }
1825 
1826     Observer.changedInstr(MI);
1827     return Legalized;
1828   case TargetOpcode::G_UDIV:
1829   case TargetOpcode::G_UREM:
1830   case TargetOpcode::G_UMIN:
1831   case TargetOpcode::G_UMAX:
1832     Observer.changingInstr(MI);
1833     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1834     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1835     widenScalarDst(MI, WideTy);
1836     Observer.changedInstr(MI);
1837     return Legalized;
1838 
1839   case TargetOpcode::G_SELECT:
1840     Observer.changingInstr(MI);
1841     if (TypeIdx == 0) {
1842       // Perform operation at larger width (any extension is fine here, high
1843       // bits don't affect the result) and then truncate the result back to the
1844       // original type.
1845       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1846       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1847       widenScalarDst(MI, WideTy);
1848     } else {
1849       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1850       // Explicit extension is required here since high bits affect the result.
1851       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1852     }
1853     Observer.changedInstr(MI);
1854     return Legalized;
1855 
1856   case TargetOpcode::G_FPTOSI:
1857   case TargetOpcode::G_FPTOUI:
1858     Observer.changingInstr(MI);
1859 
1860     if (TypeIdx == 0)
1861       widenScalarDst(MI, WideTy);
1862     else
1863       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1864 
1865     Observer.changedInstr(MI);
1866     return Legalized;
1867   case TargetOpcode::G_SITOFP:
1868     if (TypeIdx != 1)
1869       return UnableToLegalize;
1870     Observer.changingInstr(MI);
1871     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1872     Observer.changedInstr(MI);
1873     return Legalized;
1874 
1875   case TargetOpcode::G_UITOFP:
1876     if (TypeIdx != 1)
1877       return UnableToLegalize;
1878     Observer.changingInstr(MI);
1879     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1880     Observer.changedInstr(MI);
1881     return Legalized;
1882 
1883   case TargetOpcode::G_LOAD:
1884   case TargetOpcode::G_SEXTLOAD:
1885   case TargetOpcode::G_ZEXTLOAD:
1886     Observer.changingInstr(MI);
1887     widenScalarDst(MI, WideTy);
1888     Observer.changedInstr(MI);
1889     return Legalized;
1890 
1891   case TargetOpcode::G_STORE: {
1892     if (TypeIdx != 0)
1893       return UnableToLegalize;
1894 
1895     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1896     if (!isPowerOf2_32(Ty.getSizeInBits()))
1897       return UnableToLegalize;
1898 
1899     Observer.changingInstr(MI);
1900 
1901     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1902       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1903     widenScalarSrc(MI, WideTy, 0, ExtType);
1904 
1905     Observer.changedInstr(MI);
1906     return Legalized;
1907   }
1908   case TargetOpcode::G_CONSTANT: {
1909     MachineOperand &SrcMO = MI.getOperand(1);
1910     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1911     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
1912         MRI.getType(MI.getOperand(0).getReg()));
1913     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
1914             ExtOpc == TargetOpcode::G_ANYEXT) &&
1915            "Illegal Extend");
1916     const APInt &SrcVal = SrcMO.getCImm()->getValue();
1917     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
1918                            ? SrcVal.sext(WideTy.getSizeInBits())
1919                            : SrcVal.zext(WideTy.getSizeInBits());
1920     Observer.changingInstr(MI);
1921     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1922 
1923     widenScalarDst(MI, WideTy);
1924     Observer.changedInstr(MI);
1925     return Legalized;
1926   }
1927   case TargetOpcode::G_FCONSTANT: {
1928     MachineOperand &SrcMO = MI.getOperand(1);
1929     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1930     APFloat Val = SrcMO.getFPImm()->getValueAPF();
1931     bool LosesInfo;
1932     switch (WideTy.getSizeInBits()) {
1933     case 32:
1934       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1935                   &LosesInfo);
1936       break;
1937     case 64:
1938       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1939                   &LosesInfo);
1940       break;
1941     default:
1942       return UnableToLegalize;
1943     }
1944 
1945     assert(!LosesInfo && "extend should always be lossless");
1946 
1947     Observer.changingInstr(MI);
1948     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1949 
1950     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1951     Observer.changedInstr(MI);
1952     return Legalized;
1953   }
1954   case TargetOpcode::G_IMPLICIT_DEF: {
1955     Observer.changingInstr(MI);
1956     widenScalarDst(MI, WideTy);
1957     Observer.changedInstr(MI);
1958     return Legalized;
1959   }
1960   case TargetOpcode::G_BRCOND:
1961     Observer.changingInstr(MI);
1962     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1963     Observer.changedInstr(MI);
1964     return Legalized;
1965 
1966   case TargetOpcode::G_FCMP:
1967     Observer.changingInstr(MI);
1968     if (TypeIdx == 0)
1969       widenScalarDst(MI, WideTy);
1970     else {
1971       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1972       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1973     }
1974     Observer.changedInstr(MI);
1975     return Legalized;
1976 
1977   case TargetOpcode::G_ICMP:
1978     Observer.changingInstr(MI);
1979     if (TypeIdx == 0)
1980       widenScalarDst(MI, WideTy);
1981     else {
1982       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1983                                MI.getOperand(1).getPredicate()))
1984                                ? TargetOpcode::G_SEXT
1985                                : TargetOpcode::G_ZEXT;
1986       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1987       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1988     }
1989     Observer.changedInstr(MI);
1990     return Legalized;
1991 
1992   case TargetOpcode::G_PTR_ADD:
1993     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
1994     Observer.changingInstr(MI);
1995     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1996     Observer.changedInstr(MI);
1997     return Legalized;
1998 
1999   case TargetOpcode::G_PHI: {
2000     assert(TypeIdx == 0 && "Expecting only Idx 0");
2001 
2002     Observer.changingInstr(MI);
2003     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2004       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2005       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2006       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2007     }
2008 
2009     MachineBasicBlock &MBB = *MI.getParent();
2010     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2011     widenScalarDst(MI, WideTy);
2012     Observer.changedInstr(MI);
2013     return Legalized;
2014   }
2015   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2016     if (TypeIdx == 0) {
2017       Register VecReg = MI.getOperand(1).getReg();
2018       LLT VecTy = MRI.getType(VecReg);
2019       Observer.changingInstr(MI);
2020 
2021       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
2022                                      WideTy.getSizeInBits()),
2023                      1, TargetOpcode::G_SEXT);
2024 
2025       widenScalarDst(MI, WideTy, 0);
2026       Observer.changedInstr(MI);
2027       return Legalized;
2028     }
2029 
2030     if (TypeIdx != 2)
2031       return UnableToLegalize;
2032     Observer.changingInstr(MI);
2033     // TODO: Probably should be zext
2034     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2035     Observer.changedInstr(MI);
2036     return Legalized;
2037   }
2038   case TargetOpcode::G_INSERT_VECTOR_ELT: {
2039     if (TypeIdx == 1) {
2040       Observer.changingInstr(MI);
2041 
2042       Register VecReg = MI.getOperand(1).getReg();
2043       LLT VecTy = MRI.getType(VecReg);
2044       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2045 
2046       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2047       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2048       widenScalarDst(MI, WideVecTy, 0);
2049       Observer.changedInstr(MI);
2050       return Legalized;
2051     }
2052 
2053     if (TypeIdx == 2) {
2054       Observer.changingInstr(MI);
2055       // TODO: Probably should be zext
2056       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2057       Observer.changedInstr(MI);
2058     }
2059 
2060     return Legalized;
2061   }
2062   case TargetOpcode::G_FADD:
2063   case TargetOpcode::G_FMUL:
2064   case TargetOpcode::G_FSUB:
2065   case TargetOpcode::G_FMA:
2066   case TargetOpcode::G_FMAD:
2067   case TargetOpcode::G_FNEG:
2068   case TargetOpcode::G_FABS:
2069   case TargetOpcode::G_FCANONICALIZE:
2070   case TargetOpcode::G_FMINNUM:
2071   case TargetOpcode::G_FMAXNUM:
2072   case TargetOpcode::G_FMINNUM_IEEE:
2073   case TargetOpcode::G_FMAXNUM_IEEE:
2074   case TargetOpcode::G_FMINIMUM:
2075   case TargetOpcode::G_FMAXIMUM:
2076   case TargetOpcode::G_FDIV:
2077   case TargetOpcode::G_FREM:
2078   case TargetOpcode::G_FCEIL:
2079   case TargetOpcode::G_FFLOOR:
2080   case TargetOpcode::G_FCOS:
2081   case TargetOpcode::G_FSIN:
2082   case TargetOpcode::G_FLOG10:
2083   case TargetOpcode::G_FLOG:
2084   case TargetOpcode::G_FLOG2:
2085   case TargetOpcode::G_FRINT:
2086   case TargetOpcode::G_FNEARBYINT:
2087   case TargetOpcode::G_FSQRT:
2088   case TargetOpcode::G_FEXP:
2089   case TargetOpcode::G_FEXP2:
2090   case TargetOpcode::G_FPOW:
2091   case TargetOpcode::G_INTRINSIC_TRUNC:
2092   case TargetOpcode::G_INTRINSIC_ROUND:
2093     assert(TypeIdx == 0);
2094     Observer.changingInstr(MI);
2095 
2096     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2097       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2098 
2099     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2100     Observer.changedInstr(MI);
2101     return Legalized;
2102   case TargetOpcode::G_INTTOPTR:
2103     if (TypeIdx != 1)
2104       return UnableToLegalize;
2105 
2106     Observer.changingInstr(MI);
2107     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2108     Observer.changedInstr(MI);
2109     return Legalized;
2110   case TargetOpcode::G_PTRTOINT:
2111     if (TypeIdx != 0)
2112       return UnableToLegalize;
2113 
2114     Observer.changingInstr(MI);
2115     widenScalarDst(MI, WideTy, 0);
2116     Observer.changedInstr(MI);
2117     return Legalized;
2118   case TargetOpcode::G_BUILD_VECTOR: {
2119     Observer.changingInstr(MI);
2120 
2121     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2122     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2123       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2124 
2125     // Avoid changing the result vector type if the source element type was
2126     // requested.
2127     if (TypeIdx == 1) {
2128       auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
2129       MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2130     } else {
2131       widenScalarDst(MI, WideTy, 0);
2132     }
2133 
2134     Observer.changedInstr(MI);
2135     return Legalized;
2136   }
2137   case TargetOpcode::G_SEXT_INREG:
2138     if (TypeIdx != 0)
2139       return UnableToLegalize;
2140 
2141     Observer.changingInstr(MI);
2142     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2143     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2144     Observer.changedInstr(MI);
2145     return Legalized;
2146   }
2147 }
2148 
2149 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2150                              MachineIRBuilder &B, Register Src, LLT Ty) {
2151   auto Unmerge = B.buildUnmerge(Ty, Src);
2152   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2153     Pieces.push_back(Unmerge.getReg(I));
2154 }
2155 
2156 LegalizerHelper::LegalizeResult
2157 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2158   Register Dst = MI.getOperand(0).getReg();
2159   Register Src = MI.getOperand(1).getReg();
2160   LLT DstTy = MRI.getType(Dst);
2161   LLT SrcTy = MRI.getType(Src);
2162 
2163   if (SrcTy.isVector() && !DstTy.isVector()) {
2164     SmallVector<Register, 8> SrcRegs;
2165     getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcTy.getElementType());
2166     MIRBuilder.buildMerge(Dst, SrcRegs);
2167     MI.eraseFromParent();
2168     return Legalized;
2169   }
2170 
2171   if (DstTy.isVector() && !SrcTy.isVector()) {
2172     SmallVector<Register, 8> SrcRegs;
2173     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2174     MIRBuilder.buildMerge(Dst, SrcRegs);
2175     MI.eraseFromParent();
2176     return Legalized;
2177   }
2178 
2179   return UnableToLegalize;
2180 }
2181 
2182 LegalizerHelper::LegalizeResult
2183 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2184   MIRBuilder.setInstr(MI);
2185 
2186   switch (MI.getOpcode()) {
2187   case TargetOpcode::G_LOAD: {
2188     if (TypeIdx != 0)
2189       return UnableToLegalize;
2190 
2191     Observer.changingInstr(MI);
2192     bitcastDst(MI, CastTy, 0);
2193     Observer.changedInstr(MI);
2194     return Legalized;
2195   }
2196   case TargetOpcode::G_STORE: {
2197     if (TypeIdx != 0)
2198       return UnableToLegalize;
2199 
2200     Observer.changingInstr(MI);
2201     bitcastSrc(MI, CastTy, 0);
2202     Observer.changedInstr(MI);
2203     return Legalized;
2204   }
2205   case TargetOpcode::G_SELECT: {
2206     if (TypeIdx != 0)
2207       return UnableToLegalize;
2208 
2209     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2210       LLVM_DEBUG(
2211           dbgs() << "bitcast action not implemented for vector select\n");
2212       return UnableToLegalize;
2213     }
2214 
2215     Observer.changingInstr(MI);
2216     bitcastSrc(MI, CastTy, 2);
2217     bitcastSrc(MI, CastTy, 3);
2218     bitcastDst(MI, CastTy, 0);
2219     Observer.changedInstr(MI);
2220     return Legalized;
2221   }
2222   case TargetOpcode::G_AND:
2223   case TargetOpcode::G_OR:
2224   case TargetOpcode::G_XOR: {
2225     Observer.changingInstr(MI);
2226     bitcastSrc(MI, CastTy, 1);
2227     bitcastSrc(MI, CastTy, 2);
2228     bitcastDst(MI, CastTy, 0);
2229     Observer.changedInstr(MI);
2230     return Legalized;
2231   }
2232   default:
2233     return UnableToLegalize;
2234   }
2235 }
2236 
2237 LegalizerHelper::LegalizeResult
2238 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2239   using namespace TargetOpcode;
2240   MIRBuilder.setInstrAndDebugLoc(MI);
2241 
2242   switch(MI.getOpcode()) {
2243   default:
2244     return UnableToLegalize;
2245   case TargetOpcode::G_BITCAST:
2246     return lowerBitcast(MI);
2247   case TargetOpcode::G_SREM:
2248   case TargetOpcode::G_UREM: {
2249     auto Quot =
2250         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2251                               {MI.getOperand(1), MI.getOperand(2)});
2252 
2253     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2254     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2255     MI.eraseFromParent();
2256     return Legalized;
2257   }
2258   case TargetOpcode::G_SADDO:
2259   case TargetOpcode::G_SSUBO:
2260     return lowerSADDO_SSUBO(MI);
2261   case TargetOpcode::G_SMULO:
2262   case TargetOpcode::G_UMULO: {
2263     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2264     // result.
2265     Register Res = MI.getOperand(0).getReg();
2266     Register Overflow = MI.getOperand(1).getReg();
2267     Register LHS = MI.getOperand(2).getReg();
2268     Register RHS = MI.getOperand(3).getReg();
2269 
2270     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2271                           ? TargetOpcode::G_SMULH
2272                           : TargetOpcode::G_UMULH;
2273 
2274     Observer.changingInstr(MI);
2275     const auto &TII = MIRBuilder.getTII();
2276     MI.setDesc(TII.get(TargetOpcode::G_MUL));
2277     MI.RemoveOperand(1);
2278     Observer.changedInstr(MI);
2279 
2280     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2281 
2282     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2283     auto Zero = MIRBuilder.buildConstant(Ty, 0);
2284 
2285     // For *signed* multiply, overflow is detected by checking:
2286     // (hi != (lo >> bitwidth-1))
2287     if (Opcode == TargetOpcode::G_SMULH) {
2288       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2289       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2290       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2291     } else {
2292       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2293     }
2294     return Legalized;
2295   }
2296   case TargetOpcode::G_FNEG: {
2297     // TODO: Handle vector types once we are able to
2298     // represent them.
2299     if (Ty.isVector())
2300       return UnableToLegalize;
2301     Register Res = MI.getOperand(0).getReg();
2302     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2303     Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
2304     if (!ZeroTy)
2305       return UnableToLegalize;
2306     ConstantFP &ZeroForNegation =
2307         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
2308     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
2309     Register SubByReg = MI.getOperand(1).getReg();
2310     Register ZeroReg = Zero.getReg(0);
2311     MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
2312     MI.eraseFromParent();
2313     return Legalized;
2314   }
2315   case TargetOpcode::G_FSUB: {
2316     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2317     // First, check if G_FNEG is marked as Lower. If so, we may
2318     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2319     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2320       return UnableToLegalize;
2321     Register Res = MI.getOperand(0).getReg();
2322     Register LHS = MI.getOperand(1).getReg();
2323     Register RHS = MI.getOperand(2).getReg();
2324     Register Neg = MRI.createGenericVirtualRegister(Ty);
2325     MIRBuilder.buildFNeg(Neg, RHS);
2326     MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2327     MI.eraseFromParent();
2328     return Legalized;
2329   }
2330   case TargetOpcode::G_FMAD:
2331     return lowerFMad(MI);
2332   case TargetOpcode::G_FFLOOR:
2333     return lowerFFloor(MI);
2334   case TargetOpcode::G_INTRINSIC_ROUND:
2335     return lowerIntrinsicRound(MI);
2336   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2337     Register OldValRes = MI.getOperand(0).getReg();
2338     Register SuccessRes = MI.getOperand(1).getReg();
2339     Register Addr = MI.getOperand(2).getReg();
2340     Register CmpVal = MI.getOperand(3).getReg();
2341     Register NewVal = MI.getOperand(4).getReg();
2342     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2343                                   **MI.memoperands_begin());
2344     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2345     MI.eraseFromParent();
2346     return Legalized;
2347   }
2348   case TargetOpcode::G_LOAD:
2349   case TargetOpcode::G_SEXTLOAD:
2350   case TargetOpcode::G_ZEXTLOAD: {
2351     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2352     Register DstReg = MI.getOperand(0).getReg();
2353     Register PtrReg = MI.getOperand(1).getReg();
2354     LLT DstTy = MRI.getType(DstReg);
2355     auto &MMO = **MI.memoperands_begin();
2356 
2357     if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2358       if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2359         // This load needs splitting into power of 2 sized loads.
2360         if (DstTy.isVector())
2361           return UnableToLegalize;
2362         if (isPowerOf2_32(DstTy.getSizeInBits()))
2363           return UnableToLegalize; // Don't know what we're being asked to do.
2364 
2365         // Our strategy here is to generate anyextending loads for the smaller
2366         // types up to next power-2 result type, and then combine the two larger
2367         // result values together, before truncating back down to the non-pow-2
2368         // type.
2369         // E.g. v1 = i24 load =>
2370         // v2 = i32 zextload (2 byte)
2371         // v3 = i32 load (1 byte)
2372         // v4 = i32 shl v3, 16
2373         // v5 = i32 or v4, v2
2374         // v1 = i24 trunc v5
2375         // By doing this we generate the correct truncate which should get
2376         // combined away as an artifact with a matching extend.
2377         uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2378         uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2379 
2380         MachineFunction &MF = MIRBuilder.getMF();
2381         MachineMemOperand *LargeMMO =
2382             MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2383         MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2384             &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2385 
2386         LLT PtrTy = MRI.getType(PtrReg);
2387         unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2388         LLT AnyExtTy = LLT::scalar(AnyExtSize);
2389         Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2390         Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2391         auto LargeLoad = MIRBuilder.buildLoadInstr(
2392             TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2393 
2394         auto OffsetCst = MIRBuilder.buildConstant(
2395             LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2396         Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2397         auto SmallPtr =
2398             MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2399         auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2400                                               *SmallMMO);
2401 
2402         auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2403         auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2404         auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2405         MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2406         MI.eraseFromParent();
2407         return Legalized;
2408       }
2409       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2410       MI.eraseFromParent();
2411       return Legalized;
2412     }
2413 
2414     if (DstTy.isScalar()) {
2415       Register TmpReg =
2416           MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2417       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2418       switch (MI.getOpcode()) {
2419       default:
2420         llvm_unreachable("Unexpected opcode");
2421       case TargetOpcode::G_LOAD:
2422         MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
2423         break;
2424       case TargetOpcode::G_SEXTLOAD:
2425         MIRBuilder.buildSExt(DstReg, TmpReg);
2426         break;
2427       case TargetOpcode::G_ZEXTLOAD:
2428         MIRBuilder.buildZExt(DstReg, TmpReg);
2429         break;
2430       }
2431       MI.eraseFromParent();
2432       return Legalized;
2433     }
2434 
2435     return UnableToLegalize;
2436   }
2437   case TargetOpcode::G_STORE: {
2438     // Lower a non-power of 2 store into multiple pow-2 stores.
2439     // E.g. split an i24 store into an i16 store + i8 store.
2440     // We do this by first extending the stored value to the next largest power
2441     // of 2 type, and then using truncating stores to store the components.
2442     // By doing this, likewise with G_LOAD, generate an extend that can be
2443     // artifact-combined away instead of leaving behind extracts.
2444     Register SrcReg = MI.getOperand(0).getReg();
2445     Register PtrReg = MI.getOperand(1).getReg();
2446     LLT SrcTy = MRI.getType(SrcReg);
2447     MachineMemOperand &MMO = **MI.memoperands_begin();
2448     if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2449       return UnableToLegalize;
2450     if (SrcTy.isVector())
2451       return UnableToLegalize;
2452     if (isPowerOf2_32(SrcTy.getSizeInBits()))
2453       return UnableToLegalize; // Don't know what we're being asked to do.
2454 
2455     // Extend to the next pow-2.
2456     const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2457     auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2458 
2459     // Obtain the smaller value by shifting away the larger value.
2460     uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2461     uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2462     auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2463     auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2464 
2465     // Generate the PtrAdd and truncating stores.
2466     LLT PtrTy = MRI.getType(PtrReg);
2467     auto OffsetCst = MIRBuilder.buildConstant(
2468             LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2469     Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2470     auto SmallPtr =
2471         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2472 
2473     MachineFunction &MF = MIRBuilder.getMF();
2474     MachineMemOperand *LargeMMO =
2475         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2476     MachineMemOperand *SmallMMO =
2477         MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2478     MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2479     MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2480     MI.eraseFromParent();
2481     return Legalized;
2482   }
2483   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2484   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2485   case TargetOpcode::G_CTLZ:
2486   case TargetOpcode::G_CTTZ:
2487   case TargetOpcode::G_CTPOP:
2488     return lowerBitCount(MI, TypeIdx, Ty);
2489   case G_UADDO: {
2490     Register Res = MI.getOperand(0).getReg();
2491     Register CarryOut = MI.getOperand(1).getReg();
2492     Register LHS = MI.getOperand(2).getReg();
2493     Register RHS = MI.getOperand(3).getReg();
2494 
2495     MIRBuilder.buildAdd(Res, LHS, RHS);
2496     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2497 
2498     MI.eraseFromParent();
2499     return Legalized;
2500   }
2501   case G_UADDE: {
2502     Register Res = MI.getOperand(0).getReg();
2503     Register CarryOut = MI.getOperand(1).getReg();
2504     Register LHS = MI.getOperand(2).getReg();
2505     Register RHS = MI.getOperand(3).getReg();
2506     Register CarryIn = MI.getOperand(4).getReg();
2507     LLT Ty = MRI.getType(Res);
2508 
2509     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
2510     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
2511     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2512     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2513 
2514     MI.eraseFromParent();
2515     return Legalized;
2516   }
2517   case G_USUBO: {
2518     Register Res = MI.getOperand(0).getReg();
2519     Register BorrowOut = MI.getOperand(1).getReg();
2520     Register LHS = MI.getOperand(2).getReg();
2521     Register RHS = MI.getOperand(3).getReg();
2522 
2523     MIRBuilder.buildSub(Res, LHS, RHS);
2524     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2525 
2526     MI.eraseFromParent();
2527     return Legalized;
2528   }
2529   case G_USUBE: {
2530     Register Res = MI.getOperand(0).getReg();
2531     Register BorrowOut = MI.getOperand(1).getReg();
2532     Register LHS = MI.getOperand(2).getReg();
2533     Register RHS = MI.getOperand(3).getReg();
2534     Register BorrowIn = MI.getOperand(4).getReg();
2535     const LLT CondTy = MRI.getType(BorrowOut);
2536     const LLT Ty = MRI.getType(Res);
2537 
2538     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
2539     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
2540     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2541 
2542     auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
2543     auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
2544     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2545 
2546     MI.eraseFromParent();
2547     return Legalized;
2548   }
2549   case G_UITOFP:
2550     return lowerUITOFP(MI, TypeIdx, Ty);
2551   case G_SITOFP:
2552     return lowerSITOFP(MI, TypeIdx, Ty);
2553   case G_FPTOUI:
2554     return lowerFPTOUI(MI, TypeIdx, Ty);
2555   case G_FPTOSI:
2556     return lowerFPTOSI(MI);
2557   case G_FPTRUNC:
2558     return lowerFPTRUNC(MI, TypeIdx, Ty);
2559   case G_SMIN:
2560   case G_SMAX:
2561   case G_UMIN:
2562   case G_UMAX:
2563     return lowerMinMax(MI, TypeIdx, Ty);
2564   case G_FCOPYSIGN:
2565     return lowerFCopySign(MI, TypeIdx, Ty);
2566   case G_FMINNUM:
2567   case G_FMAXNUM:
2568     return lowerFMinNumMaxNum(MI);
2569   case G_MERGE_VALUES:
2570     return lowerMergeValues(MI);
2571   case G_UNMERGE_VALUES:
2572     return lowerUnmergeValues(MI);
2573   case TargetOpcode::G_SEXT_INREG: {
2574     assert(MI.getOperand(2).isImm() && "Expected immediate");
2575     int64_t SizeInBits = MI.getOperand(2).getImm();
2576 
2577     Register DstReg = MI.getOperand(0).getReg();
2578     Register SrcReg = MI.getOperand(1).getReg();
2579     LLT DstTy = MRI.getType(DstReg);
2580     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2581 
2582     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2583     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
2584     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
2585     MI.eraseFromParent();
2586     return Legalized;
2587   }
2588   case G_SHUFFLE_VECTOR:
2589     return lowerShuffleVector(MI);
2590   case G_DYN_STACKALLOC:
2591     return lowerDynStackAlloc(MI);
2592   case G_EXTRACT:
2593     return lowerExtract(MI);
2594   case G_INSERT:
2595     return lowerInsert(MI);
2596   case G_BSWAP:
2597     return lowerBswap(MI);
2598   case G_BITREVERSE:
2599     return lowerBitreverse(MI);
2600   case G_READ_REGISTER:
2601   case G_WRITE_REGISTER:
2602     return lowerReadWriteRegister(MI);
2603   }
2604 }
2605 
2606 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2607     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2608   SmallVector<Register, 2> DstRegs;
2609 
2610   unsigned NarrowSize = NarrowTy.getSizeInBits();
2611   Register DstReg = MI.getOperand(0).getReg();
2612   unsigned Size = MRI.getType(DstReg).getSizeInBits();
2613   int NumParts = Size / NarrowSize;
2614   // FIXME: Don't know how to handle the situation where the small vectors
2615   // aren't all the same size yet.
2616   if (Size % NarrowSize != 0)
2617     return UnableToLegalize;
2618 
2619   for (int i = 0; i < NumParts; ++i) {
2620     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2621     MIRBuilder.buildUndef(TmpReg);
2622     DstRegs.push_back(TmpReg);
2623   }
2624 
2625   if (NarrowTy.isVector())
2626     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2627   else
2628     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2629 
2630   MI.eraseFromParent();
2631   return Legalized;
2632 }
2633 
2634 // Handle splitting vector operations which need to have the same number of
2635 // elements in each type index, but each type index may have a different element
2636 // type.
2637 //
2638 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2639 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2640 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2641 //
2642 // Also handles some irregular breakdown cases, e.g.
2643 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2644 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2645 //             s64 = G_SHL s64, s32
2646 LegalizerHelper::LegalizeResult
2647 LegalizerHelper::fewerElementsVectorMultiEltType(
2648   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2649   if (TypeIdx != 0)
2650     return UnableToLegalize;
2651 
2652   const LLT NarrowTy0 = NarrowTyArg;
2653   const unsigned NewNumElts =
2654       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2655 
2656   const Register DstReg = MI.getOperand(0).getReg();
2657   LLT DstTy = MRI.getType(DstReg);
2658   LLT LeftoverTy0;
2659 
2660   // All of the operands need to have the same number of elements, so if we can
2661   // determine a type breakdown for the result type, we can for all of the
2662   // source types.
2663   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2664   if (NumParts < 0)
2665     return UnableToLegalize;
2666 
2667   SmallVector<MachineInstrBuilder, 4> NewInsts;
2668 
2669   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2670   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2671 
2672   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2673     LLT LeftoverTy;
2674     Register SrcReg = MI.getOperand(I).getReg();
2675     LLT SrcTyI = MRI.getType(SrcReg);
2676     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2677     LLT LeftoverTyI;
2678 
2679     // Split this operand into the requested typed registers, and any leftover
2680     // required to reproduce the original type.
2681     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2682                       LeftoverRegs))
2683       return UnableToLegalize;
2684 
2685     if (I == 1) {
2686       // For the first operand, create an instruction for each part and setup
2687       // the result.
2688       for (Register PartReg : PartRegs) {
2689         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2690         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2691                                .addDef(PartDstReg)
2692                                .addUse(PartReg));
2693         DstRegs.push_back(PartDstReg);
2694       }
2695 
2696       for (Register LeftoverReg : LeftoverRegs) {
2697         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2698         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2699                                .addDef(PartDstReg)
2700                                .addUse(LeftoverReg));
2701         LeftoverDstRegs.push_back(PartDstReg);
2702       }
2703     } else {
2704       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2705 
2706       // Add the newly created operand splits to the existing instructions. The
2707       // odd-sized pieces are ordered after the requested NarrowTyArg sized
2708       // pieces.
2709       unsigned InstCount = 0;
2710       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2711         NewInsts[InstCount++].addUse(PartRegs[J]);
2712       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2713         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2714     }
2715 
2716     PartRegs.clear();
2717     LeftoverRegs.clear();
2718   }
2719 
2720   // Insert the newly built operations and rebuild the result register.
2721   for (auto &MIB : NewInsts)
2722     MIRBuilder.insertInstr(MIB);
2723 
2724   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2725 
2726   MI.eraseFromParent();
2727   return Legalized;
2728 }
2729 
2730 LegalizerHelper::LegalizeResult
2731 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2732                                           LLT NarrowTy) {
2733   if (TypeIdx != 0)
2734     return UnableToLegalize;
2735 
2736   Register DstReg = MI.getOperand(0).getReg();
2737   Register SrcReg = MI.getOperand(1).getReg();
2738   LLT DstTy = MRI.getType(DstReg);
2739   LLT SrcTy = MRI.getType(SrcReg);
2740 
2741   LLT NarrowTy0 = NarrowTy;
2742   LLT NarrowTy1;
2743   unsigned NumParts;
2744 
2745   if (NarrowTy.isVector()) {
2746     // Uneven breakdown not handled.
2747     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2748     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2749       return UnableToLegalize;
2750 
2751     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2752   } else {
2753     NumParts = DstTy.getNumElements();
2754     NarrowTy1 = SrcTy.getElementType();
2755   }
2756 
2757   SmallVector<Register, 4> SrcRegs, DstRegs;
2758   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2759 
2760   for (unsigned I = 0; I < NumParts; ++I) {
2761     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2762     MachineInstr *NewInst =
2763         MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
2764 
2765     NewInst->setFlags(MI.getFlags());
2766     DstRegs.push_back(DstReg);
2767   }
2768 
2769   if (NarrowTy.isVector())
2770     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2771   else
2772     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2773 
2774   MI.eraseFromParent();
2775   return Legalized;
2776 }
2777 
2778 LegalizerHelper::LegalizeResult
2779 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2780                                         LLT NarrowTy) {
2781   Register DstReg = MI.getOperand(0).getReg();
2782   Register Src0Reg = MI.getOperand(2).getReg();
2783   LLT DstTy = MRI.getType(DstReg);
2784   LLT SrcTy = MRI.getType(Src0Reg);
2785 
2786   unsigned NumParts;
2787   LLT NarrowTy0, NarrowTy1;
2788 
2789   if (TypeIdx == 0) {
2790     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2791     unsigned OldElts = DstTy.getNumElements();
2792 
2793     NarrowTy0 = NarrowTy;
2794     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2795     NarrowTy1 = NarrowTy.isVector() ?
2796       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2797       SrcTy.getElementType();
2798 
2799   } else {
2800     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2801     unsigned OldElts = SrcTy.getNumElements();
2802 
2803     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2804       NarrowTy.getNumElements();
2805     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2806                             DstTy.getScalarSizeInBits());
2807     NarrowTy1 = NarrowTy;
2808   }
2809 
2810   // FIXME: Don't know how to handle the situation where the small vectors
2811   // aren't all the same size yet.
2812   if (NarrowTy1.isVector() &&
2813       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2814     return UnableToLegalize;
2815 
2816   CmpInst::Predicate Pred
2817     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2818 
2819   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2820   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2821   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2822 
2823   for (unsigned I = 0; I < NumParts; ++I) {
2824     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2825     DstRegs.push_back(DstReg);
2826 
2827     if (MI.getOpcode() == TargetOpcode::G_ICMP)
2828       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2829     else {
2830       MachineInstr *NewCmp
2831         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2832       NewCmp->setFlags(MI.getFlags());
2833     }
2834   }
2835 
2836   if (NarrowTy1.isVector())
2837     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2838   else
2839     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2840 
2841   MI.eraseFromParent();
2842   return Legalized;
2843 }
2844 
2845 LegalizerHelper::LegalizeResult
2846 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2847                                            LLT NarrowTy) {
2848   Register DstReg = MI.getOperand(0).getReg();
2849   Register CondReg = MI.getOperand(1).getReg();
2850 
2851   unsigned NumParts = 0;
2852   LLT NarrowTy0, NarrowTy1;
2853 
2854   LLT DstTy = MRI.getType(DstReg);
2855   LLT CondTy = MRI.getType(CondReg);
2856   unsigned Size = DstTy.getSizeInBits();
2857 
2858   assert(TypeIdx == 0 || CondTy.isVector());
2859 
2860   if (TypeIdx == 0) {
2861     NarrowTy0 = NarrowTy;
2862     NarrowTy1 = CondTy;
2863 
2864     unsigned NarrowSize = NarrowTy0.getSizeInBits();
2865     // FIXME: Don't know how to handle the situation where the small vectors
2866     // aren't all the same size yet.
2867     if (Size % NarrowSize != 0)
2868       return UnableToLegalize;
2869 
2870     NumParts = Size / NarrowSize;
2871 
2872     // Need to break down the condition type
2873     if (CondTy.isVector()) {
2874       if (CondTy.getNumElements() == NumParts)
2875         NarrowTy1 = CondTy.getElementType();
2876       else
2877         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2878                                 CondTy.getScalarSizeInBits());
2879     }
2880   } else {
2881     NumParts = CondTy.getNumElements();
2882     if (NarrowTy.isVector()) {
2883       // TODO: Handle uneven breakdown.
2884       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2885         return UnableToLegalize;
2886 
2887       return UnableToLegalize;
2888     } else {
2889       NarrowTy0 = DstTy.getElementType();
2890       NarrowTy1 = NarrowTy;
2891     }
2892   }
2893 
2894   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2895   if (CondTy.isVector())
2896     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2897 
2898   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2899   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2900 
2901   for (unsigned i = 0; i < NumParts; ++i) {
2902     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2903     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2904                            Src1Regs[i], Src2Regs[i]);
2905     DstRegs.push_back(DstReg);
2906   }
2907 
2908   if (NarrowTy0.isVector())
2909     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2910   else
2911     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2912 
2913   MI.eraseFromParent();
2914   return Legalized;
2915 }
2916 
2917 LegalizerHelper::LegalizeResult
2918 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2919                                         LLT NarrowTy) {
2920   const Register DstReg = MI.getOperand(0).getReg();
2921   LLT PhiTy = MRI.getType(DstReg);
2922   LLT LeftoverTy;
2923 
2924   // All of the operands need to have the same number of elements, so if we can
2925   // determine a type breakdown for the result type, we can for all of the
2926   // source types.
2927   int NumParts, NumLeftover;
2928   std::tie(NumParts, NumLeftover)
2929     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2930   if (NumParts < 0)
2931     return UnableToLegalize;
2932 
2933   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2934   SmallVector<MachineInstrBuilder, 4> NewInsts;
2935 
2936   const int TotalNumParts = NumParts + NumLeftover;
2937 
2938   // Insert the new phis in the result block first.
2939   for (int I = 0; I != TotalNumParts; ++I) {
2940     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2941     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2942     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2943                        .addDef(PartDstReg));
2944     if (I < NumParts)
2945       DstRegs.push_back(PartDstReg);
2946     else
2947       LeftoverDstRegs.push_back(PartDstReg);
2948   }
2949 
2950   MachineBasicBlock *MBB = MI.getParent();
2951   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2952   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2953 
2954   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2955 
2956   // Insert code to extract the incoming values in each predecessor block.
2957   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2958     PartRegs.clear();
2959     LeftoverRegs.clear();
2960 
2961     Register SrcReg = MI.getOperand(I).getReg();
2962     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2963     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2964 
2965     LLT Unused;
2966     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2967                       LeftoverRegs))
2968       return UnableToLegalize;
2969 
2970     // Add the newly created operand splits to the existing instructions. The
2971     // odd-sized pieces are ordered after the requested NarrowTyArg sized
2972     // pieces.
2973     for (int J = 0; J != TotalNumParts; ++J) {
2974       MachineInstrBuilder MIB = NewInsts[J];
2975       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2976       MIB.addMBB(&OpMBB);
2977     }
2978   }
2979 
2980   MI.eraseFromParent();
2981   return Legalized;
2982 }
2983 
2984 LegalizerHelper::LegalizeResult
2985 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
2986                                                   unsigned TypeIdx,
2987                                                   LLT NarrowTy) {
2988   if (TypeIdx != 1)
2989     return UnableToLegalize;
2990 
2991   const int NumDst = MI.getNumOperands() - 1;
2992   const Register SrcReg = MI.getOperand(NumDst).getReg();
2993   LLT SrcTy = MRI.getType(SrcReg);
2994 
2995   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2996 
2997   // TODO: Create sequence of extracts.
2998   if (DstTy == NarrowTy)
2999     return UnableToLegalize;
3000 
3001   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3002   if (DstTy == GCDTy) {
3003     // This would just be a copy of the same unmerge.
3004     // TODO: Create extracts, pad with undef and create intermediate merges.
3005     return UnableToLegalize;
3006   }
3007 
3008   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3009   const int NumUnmerge = Unmerge->getNumOperands() - 1;
3010   const int PartsPerUnmerge = NumDst / NumUnmerge;
3011 
3012   for (int I = 0; I != NumUnmerge; ++I) {
3013     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3014 
3015     for (int J = 0; J != PartsPerUnmerge; ++J)
3016       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3017     MIB.addUse(Unmerge.getReg(I));
3018   }
3019 
3020   MI.eraseFromParent();
3021   return Legalized;
3022 }
3023 
3024 LegalizerHelper::LegalizeResult
3025 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
3026                                                 unsigned TypeIdx,
3027                                                 LLT NarrowTy) {
3028   assert(TypeIdx == 0 && "not a vector type index");
3029   Register DstReg = MI.getOperand(0).getReg();
3030   LLT DstTy = MRI.getType(DstReg);
3031   LLT SrcTy = DstTy.getElementType();
3032 
3033   int DstNumElts = DstTy.getNumElements();
3034   int NarrowNumElts = NarrowTy.getNumElements();
3035   int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
3036   LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
3037 
3038   SmallVector<Register, 8> ConcatOps;
3039   SmallVector<Register, 8> SubBuildVector;
3040 
3041   Register UndefReg;
3042   if (WidenedDstTy != DstTy)
3043     UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
3044 
3045   // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
3046   // necessary.
3047   //
3048   // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3049   //   -> <2 x s16>
3050   //
3051   // %4:_(s16) = G_IMPLICIT_DEF
3052   // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3053   // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3054   // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
3055   // %3:_(<3 x s16>) = G_EXTRACT %7, 0
3056   for (int I = 0; I != NumConcat; ++I) {
3057     for (int J = 0; J != NarrowNumElts; ++J) {
3058       int SrcIdx = NarrowNumElts * I + J;
3059 
3060       if (SrcIdx < DstNumElts) {
3061         Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
3062         SubBuildVector.push_back(SrcReg);
3063       } else
3064         SubBuildVector.push_back(UndefReg);
3065     }
3066 
3067     auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
3068     ConcatOps.push_back(BuildVec.getReg(0));
3069     SubBuildVector.clear();
3070   }
3071 
3072   if (DstTy == WidenedDstTy)
3073     MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
3074   else {
3075     auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
3076     MIRBuilder.buildExtract(DstReg, Concat, 0);
3077   }
3078 
3079   MI.eraseFromParent();
3080   return Legalized;
3081 }
3082 
3083 LegalizerHelper::LegalizeResult
3084 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3085                                       LLT NarrowTy) {
3086   // FIXME: Don't know how to handle secondary types yet.
3087   if (TypeIdx != 0)
3088     return UnableToLegalize;
3089 
3090   MachineMemOperand *MMO = *MI.memoperands_begin();
3091 
3092   // This implementation doesn't work for atomics. Give up instead of doing
3093   // something invalid.
3094   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3095       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3096     return UnableToLegalize;
3097 
3098   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3099   Register ValReg = MI.getOperand(0).getReg();
3100   Register AddrReg = MI.getOperand(1).getReg();
3101   LLT ValTy = MRI.getType(ValReg);
3102 
3103   // FIXME: Do we need a distinct NarrowMemory legalize action?
3104   if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3105     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3106     return UnableToLegalize;
3107   }
3108 
3109   int NumParts = -1;
3110   int NumLeftover = -1;
3111   LLT LeftoverTy;
3112   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3113   if (IsLoad) {
3114     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3115   } else {
3116     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3117                      NarrowLeftoverRegs)) {
3118       NumParts = NarrowRegs.size();
3119       NumLeftover = NarrowLeftoverRegs.size();
3120     }
3121   }
3122 
3123   if (NumParts == -1)
3124     return UnableToLegalize;
3125 
3126   const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
3127 
3128   unsigned TotalSize = ValTy.getSizeInBits();
3129 
3130   // Split the load/store into PartTy sized pieces starting at Offset. If this
3131   // is a load, return the new registers in ValRegs. For a store, each elements
3132   // of ValRegs should be PartTy. Returns the next offset that needs to be
3133   // handled.
3134   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3135                              unsigned Offset) -> unsigned {
3136     MachineFunction &MF = MIRBuilder.getMF();
3137     unsigned PartSize = PartTy.getSizeInBits();
3138     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3139          Offset += PartSize, ++Idx) {
3140       unsigned ByteSize = PartSize / 8;
3141       unsigned ByteOffset = Offset / 8;
3142       Register NewAddrReg;
3143 
3144       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3145 
3146       MachineMemOperand *NewMMO =
3147         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3148 
3149       if (IsLoad) {
3150         Register Dst = MRI.createGenericVirtualRegister(PartTy);
3151         ValRegs.push_back(Dst);
3152         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3153       } else {
3154         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3155       }
3156     }
3157 
3158     return Offset;
3159   };
3160 
3161   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3162 
3163   // Handle the rest of the register if this isn't an even type breakdown.
3164   if (LeftoverTy.isValid())
3165     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3166 
3167   if (IsLoad) {
3168     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3169                 LeftoverTy, NarrowLeftoverRegs);
3170   }
3171 
3172   MI.eraseFromParent();
3173   return Legalized;
3174 }
3175 
3176 LegalizerHelper::LegalizeResult
3177 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
3178                                       LLT NarrowTy) {
3179   assert(TypeIdx == 0 && "only one type index expected");
3180 
3181   const unsigned Opc = MI.getOpcode();
3182   const int NumOps = MI.getNumOperands() - 1;
3183   const Register DstReg = MI.getOperand(0).getReg();
3184   const unsigned Flags = MI.getFlags();
3185   const unsigned NarrowSize = NarrowTy.getSizeInBits();
3186   const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3187 
3188   assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3189 
3190   // First of all check whether we are narrowing (changing the element type)
3191   // or reducing the vector elements
3192   const LLT DstTy = MRI.getType(DstReg);
3193   const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3194 
3195   SmallVector<Register, 8> ExtractedRegs[3];
3196   SmallVector<Register, 8> Parts;
3197 
3198   unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3199 
3200   // Break down all the sources into NarrowTy pieces we can operate on. This may
3201   // involve creating merges to a wider type, padded with undef.
3202   for (int I = 0; I != NumOps; ++I) {
3203     Register SrcReg = MI.getOperand(I + 1).getReg();
3204     LLT SrcTy = MRI.getType(SrcReg);
3205 
3206     // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3207     // For fewerElements, this is a smaller vector with the same element type.
3208     LLT OpNarrowTy;
3209     if (IsNarrow) {
3210       OpNarrowTy = NarrowScalarTy;
3211 
3212       // In case of narrowing, we need to cast vectors to scalars for this to
3213       // work properly
3214       // FIXME: Can we do without the bitcast here if we're narrowing?
3215       if (SrcTy.isVector()) {
3216         SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3217         SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3218       }
3219     } else {
3220       OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
3221     }
3222 
3223     LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
3224 
3225     // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
3226     buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
3227                         TargetOpcode::G_ANYEXT);
3228   }
3229 
3230   SmallVector<Register, 8> ResultRegs;
3231 
3232   // Input operands for each sub-instruction.
3233   SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
3234 
3235   int NumParts = ExtractedRegs[0].size();
3236   const unsigned DstSize = DstTy.getSizeInBits();
3237   const LLT DstScalarTy = LLT::scalar(DstSize);
3238 
3239   // Narrowing needs to use scalar types
3240   LLT DstLCMTy, NarrowDstTy;
3241   if (IsNarrow) {
3242     DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
3243     NarrowDstTy = NarrowScalarTy;
3244   } else {
3245     DstLCMTy = getLCMType(DstTy, NarrowTy);
3246     NarrowDstTy = NarrowTy;
3247   }
3248 
3249   // We widened the source registers to satisfy merge/unmerge size
3250   // constraints. We'll have some extra fully undef parts.
3251   const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
3252 
3253   for (int I = 0; I != NumRealParts; ++I) {
3254     // Emit this instruction on each of the split pieces.
3255     for (int J = 0; J != NumOps; ++J)
3256       InputRegs[J] = ExtractedRegs[J][I];
3257 
3258     auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
3259     ResultRegs.push_back(Inst.getReg(0));
3260   }
3261 
3262   // Fill out the widened result with undef instead of creating instructions
3263   // with undef inputs.
3264   int NumUndefParts = NumParts - NumRealParts;
3265   if (NumUndefParts != 0)
3266     ResultRegs.append(NumUndefParts,
3267                       MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
3268 
3269   // Extract the possibly padded result. Use a scratch register if we need to do
3270   // a final bitcast, otherwise use the original result register.
3271   Register MergeDstReg;
3272   if (IsNarrow && DstTy.isVector())
3273     MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
3274   else
3275     MergeDstReg = DstReg;
3276 
3277   buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
3278 
3279   // Recast to vector if we narrowed a vector
3280   if (IsNarrow && DstTy.isVector())
3281     MIRBuilder.buildBitcast(DstReg, MergeDstReg);
3282 
3283   MI.eraseFromParent();
3284   return Legalized;
3285 }
3286 
3287 LegalizerHelper::LegalizeResult
3288 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
3289                                               LLT NarrowTy) {
3290   Register DstReg = MI.getOperand(0).getReg();
3291   Register SrcReg = MI.getOperand(1).getReg();
3292   int64_t Imm = MI.getOperand(2).getImm();
3293 
3294   LLT DstTy = MRI.getType(DstReg);
3295 
3296   SmallVector<Register, 8> Parts;
3297   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3298   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3299 
3300   for (Register &R : Parts)
3301     R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
3302 
3303   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3304 
3305   MI.eraseFromParent();
3306   return Legalized;
3307 }
3308 
3309 LegalizerHelper::LegalizeResult
3310 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
3311                                      LLT NarrowTy) {
3312   using namespace TargetOpcode;
3313 
3314   MIRBuilder.setInstrAndDebugLoc(MI);
3315   switch (MI.getOpcode()) {
3316   case G_IMPLICIT_DEF:
3317     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3318   case G_TRUNC:
3319   case G_AND:
3320   case G_OR:
3321   case G_XOR:
3322   case G_ADD:
3323   case G_SUB:
3324   case G_MUL:
3325   case G_SMULH:
3326   case G_UMULH:
3327   case G_FADD:
3328   case G_FMUL:
3329   case G_FSUB:
3330   case G_FNEG:
3331   case G_FABS:
3332   case G_FCANONICALIZE:
3333   case G_FDIV:
3334   case G_FREM:
3335   case G_FMA:
3336   case G_FMAD:
3337   case G_FPOW:
3338   case G_FEXP:
3339   case G_FEXP2:
3340   case G_FLOG:
3341   case G_FLOG2:
3342   case G_FLOG10:
3343   case G_FNEARBYINT:
3344   case G_FCEIL:
3345   case G_FFLOOR:
3346   case G_FRINT:
3347   case G_INTRINSIC_ROUND:
3348   case G_INTRINSIC_TRUNC:
3349   case G_FCOS:
3350   case G_FSIN:
3351   case G_FSQRT:
3352   case G_BSWAP:
3353   case G_BITREVERSE:
3354   case G_SDIV:
3355   case G_UDIV:
3356   case G_SREM:
3357   case G_UREM:
3358   case G_SMIN:
3359   case G_SMAX:
3360   case G_UMIN:
3361   case G_UMAX:
3362   case G_FMINNUM:
3363   case G_FMAXNUM:
3364   case G_FMINNUM_IEEE:
3365   case G_FMAXNUM_IEEE:
3366   case G_FMINIMUM:
3367   case G_FMAXIMUM:
3368   case G_FSHL:
3369   case G_FSHR:
3370   case G_FREEZE:
3371     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
3372   case G_SHL:
3373   case G_LSHR:
3374   case G_ASHR:
3375   case G_CTLZ:
3376   case G_CTLZ_ZERO_UNDEF:
3377   case G_CTTZ:
3378   case G_CTTZ_ZERO_UNDEF:
3379   case G_CTPOP:
3380   case G_FCOPYSIGN:
3381     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
3382   case G_ZEXT:
3383   case G_SEXT:
3384   case G_ANYEXT:
3385   case G_FPEXT:
3386   case G_FPTRUNC:
3387   case G_SITOFP:
3388   case G_UITOFP:
3389   case G_FPTOSI:
3390   case G_FPTOUI:
3391   case G_INTTOPTR:
3392   case G_PTRTOINT:
3393   case G_ADDRSPACE_CAST:
3394     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
3395   case G_ICMP:
3396   case G_FCMP:
3397     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3398   case G_SELECT:
3399     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3400   case G_PHI:
3401     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3402   case G_UNMERGE_VALUES:
3403     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3404   case G_BUILD_VECTOR:
3405     return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3406   case G_LOAD:
3407   case G_STORE:
3408     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3409   case G_SEXT_INREG:
3410     return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
3411   default:
3412     return UnableToLegalize;
3413   }
3414 }
3415 
3416 LegalizerHelper::LegalizeResult
3417 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
3418                                              const LLT HalfTy, const LLT AmtTy) {
3419 
3420   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3421   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3422   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3423 
3424   if (Amt.isNullValue()) {
3425     MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
3426     MI.eraseFromParent();
3427     return Legalized;
3428   }
3429 
3430   LLT NVT = HalfTy;
3431   unsigned NVTBits = HalfTy.getSizeInBits();
3432   unsigned VTBits = 2 * NVTBits;
3433 
3434   SrcOp Lo(Register(0)), Hi(Register(0));
3435   if (MI.getOpcode() == TargetOpcode::G_SHL) {
3436     if (Amt.ugt(VTBits)) {
3437       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3438     } else if (Amt.ugt(NVTBits)) {
3439       Lo = MIRBuilder.buildConstant(NVT, 0);
3440       Hi = MIRBuilder.buildShl(NVT, InL,
3441                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3442     } else if (Amt == NVTBits) {
3443       Lo = MIRBuilder.buildConstant(NVT, 0);
3444       Hi = InL;
3445     } else {
3446       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3447       auto OrLHS =
3448           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3449       auto OrRHS = MIRBuilder.buildLShr(
3450           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3451       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3452     }
3453   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3454     if (Amt.ugt(VTBits)) {
3455       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3456     } else if (Amt.ugt(NVTBits)) {
3457       Lo = MIRBuilder.buildLShr(NVT, InH,
3458                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3459       Hi = MIRBuilder.buildConstant(NVT, 0);
3460     } else if (Amt == NVTBits) {
3461       Lo = InH;
3462       Hi = MIRBuilder.buildConstant(NVT, 0);
3463     } else {
3464       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3465 
3466       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3467       auto OrRHS = MIRBuilder.buildShl(
3468           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3469 
3470       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3471       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3472     }
3473   } else {
3474     if (Amt.ugt(VTBits)) {
3475       Hi = Lo = MIRBuilder.buildAShr(
3476           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3477     } else if (Amt.ugt(NVTBits)) {
3478       Lo = MIRBuilder.buildAShr(NVT, InH,
3479                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3480       Hi = MIRBuilder.buildAShr(NVT, InH,
3481                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3482     } else if (Amt == NVTBits) {
3483       Lo = InH;
3484       Hi = MIRBuilder.buildAShr(NVT, InH,
3485                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3486     } else {
3487       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3488 
3489       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3490       auto OrRHS = MIRBuilder.buildShl(
3491           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3492 
3493       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3494       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3495     }
3496   }
3497 
3498   MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
3499   MI.eraseFromParent();
3500 
3501   return Legalized;
3502 }
3503 
3504 // TODO: Optimize if constant shift amount.
3505 LegalizerHelper::LegalizeResult
3506 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3507                                    LLT RequestedTy) {
3508   if (TypeIdx == 1) {
3509     Observer.changingInstr(MI);
3510     narrowScalarSrc(MI, RequestedTy, 2);
3511     Observer.changedInstr(MI);
3512     return Legalized;
3513   }
3514 
3515   Register DstReg = MI.getOperand(0).getReg();
3516   LLT DstTy = MRI.getType(DstReg);
3517   if (DstTy.isVector())
3518     return UnableToLegalize;
3519 
3520   Register Amt = MI.getOperand(2).getReg();
3521   LLT ShiftAmtTy = MRI.getType(Amt);
3522   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3523   if (DstEltSize % 2 != 0)
3524     return UnableToLegalize;
3525 
3526   // Ignore the input type. We can only go to exactly half the size of the
3527   // input. If that isn't small enough, the resulting pieces will be further
3528   // legalized.
3529   const unsigned NewBitSize = DstEltSize / 2;
3530   const LLT HalfTy = LLT::scalar(NewBitSize);
3531   const LLT CondTy = LLT::scalar(1);
3532 
3533   if (const MachineInstr *KShiftAmt =
3534           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3535     return narrowScalarShiftByConstant(
3536         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3537   }
3538 
3539   // TODO: Expand with known bits.
3540 
3541   // Handle the fully general expansion by an unknown amount.
3542   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3543 
3544   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3545   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3546   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
3547 
3548   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3549   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3550 
3551   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3552   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3553   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3554 
3555   Register ResultRegs[2];
3556   switch (MI.getOpcode()) {
3557   case TargetOpcode::G_SHL: {
3558     // Short: ShAmt < NewBitSize
3559     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3560 
3561     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3562     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3563     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3564 
3565     // Long: ShAmt >= NewBitSize
3566     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
3567     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3568 
3569     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3570     auto Hi = MIRBuilder.buildSelect(
3571         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3572 
3573     ResultRegs[0] = Lo.getReg(0);
3574     ResultRegs[1] = Hi.getReg(0);
3575     break;
3576   }
3577   case TargetOpcode::G_LSHR:
3578   case TargetOpcode::G_ASHR: {
3579     // Short: ShAmt < NewBitSize
3580     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3581 
3582     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3583     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3584     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3585 
3586     // Long: ShAmt >= NewBitSize
3587     MachineInstrBuilder HiL;
3588     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3589       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
3590     } else {
3591       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3592       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
3593     }
3594     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3595                                      {InH, AmtExcess});     // Lo from Hi part.
3596 
3597     auto Lo = MIRBuilder.buildSelect(
3598         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3599 
3600     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3601 
3602     ResultRegs[0] = Lo.getReg(0);
3603     ResultRegs[1] = Hi.getReg(0);
3604     break;
3605   }
3606   default:
3607     llvm_unreachable("not a shift");
3608   }
3609 
3610   MIRBuilder.buildMerge(DstReg, ResultRegs);
3611   MI.eraseFromParent();
3612   return Legalized;
3613 }
3614 
3615 LegalizerHelper::LegalizeResult
3616 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3617                                        LLT MoreTy) {
3618   assert(TypeIdx == 0 && "Expecting only Idx 0");
3619 
3620   Observer.changingInstr(MI);
3621   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3622     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3623     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3624     moreElementsVectorSrc(MI, MoreTy, I);
3625   }
3626 
3627   MachineBasicBlock &MBB = *MI.getParent();
3628   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3629   moreElementsVectorDst(MI, MoreTy, 0);
3630   Observer.changedInstr(MI);
3631   return Legalized;
3632 }
3633 
3634 LegalizerHelper::LegalizeResult
3635 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3636                                     LLT MoreTy) {
3637   MIRBuilder.setInstrAndDebugLoc(MI);
3638   unsigned Opc = MI.getOpcode();
3639   switch (Opc) {
3640   case TargetOpcode::G_IMPLICIT_DEF:
3641   case TargetOpcode::G_LOAD: {
3642     if (TypeIdx != 0)
3643       return UnableToLegalize;
3644     Observer.changingInstr(MI);
3645     moreElementsVectorDst(MI, MoreTy, 0);
3646     Observer.changedInstr(MI);
3647     return Legalized;
3648   }
3649   case TargetOpcode::G_STORE:
3650     if (TypeIdx != 0)
3651       return UnableToLegalize;
3652     Observer.changingInstr(MI);
3653     moreElementsVectorSrc(MI, MoreTy, 0);
3654     Observer.changedInstr(MI);
3655     return Legalized;
3656   case TargetOpcode::G_AND:
3657   case TargetOpcode::G_OR:
3658   case TargetOpcode::G_XOR:
3659   case TargetOpcode::G_SMIN:
3660   case TargetOpcode::G_SMAX:
3661   case TargetOpcode::G_UMIN:
3662   case TargetOpcode::G_UMAX:
3663   case TargetOpcode::G_FMINNUM:
3664   case TargetOpcode::G_FMAXNUM:
3665   case TargetOpcode::G_FMINNUM_IEEE:
3666   case TargetOpcode::G_FMAXNUM_IEEE:
3667   case TargetOpcode::G_FMINIMUM:
3668   case TargetOpcode::G_FMAXIMUM: {
3669     Observer.changingInstr(MI);
3670     moreElementsVectorSrc(MI, MoreTy, 1);
3671     moreElementsVectorSrc(MI, MoreTy, 2);
3672     moreElementsVectorDst(MI, MoreTy, 0);
3673     Observer.changedInstr(MI);
3674     return Legalized;
3675   }
3676   case TargetOpcode::G_EXTRACT:
3677     if (TypeIdx != 1)
3678       return UnableToLegalize;
3679     Observer.changingInstr(MI);
3680     moreElementsVectorSrc(MI, MoreTy, 1);
3681     Observer.changedInstr(MI);
3682     return Legalized;
3683   case TargetOpcode::G_INSERT:
3684   case TargetOpcode::G_FREEZE:
3685     if (TypeIdx != 0)
3686       return UnableToLegalize;
3687     Observer.changingInstr(MI);
3688     moreElementsVectorSrc(MI, MoreTy, 1);
3689     moreElementsVectorDst(MI, MoreTy, 0);
3690     Observer.changedInstr(MI);
3691     return Legalized;
3692   case TargetOpcode::G_SELECT:
3693     if (TypeIdx != 0)
3694       return UnableToLegalize;
3695     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3696       return UnableToLegalize;
3697 
3698     Observer.changingInstr(MI);
3699     moreElementsVectorSrc(MI, MoreTy, 2);
3700     moreElementsVectorSrc(MI, MoreTy, 3);
3701     moreElementsVectorDst(MI, MoreTy, 0);
3702     Observer.changedInstr(MI);
3703     return Legalized;
3704   case TargetOpcode::G_UNMERGE_VALUES: {
3705     if (TypeIdx != 1)
3706       return UnableToLegalize;
3707 
3708     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3709     int NumDst = MI.getNumOperands() - 1;
3710     moreElementsVectorSrc(MI, MoreTy, NumDst);
3711 
3712     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3713     for (int I = 0; I != NumDst; ++I)
3714       MIB.addDef(MI.getOperand(I).getReg());
3715 
3716     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3717     for (int I = NumDst; I != NewNumDst; ++I)
3718       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3719 
3720     MIB.addUse(MI.getOperand(NumDst).getReg());
3721     MI.eraseFromParent();
3722     return Legalized;
3723   }
3724   case TargetOpcode::G_PHI:
3725     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3726   default:
3727     return UnableToLegalize;
3728   }
3729 }
3730 
3731 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3732                                         ArrayRef<Register> Src1Regs,
3733                                         ArrayRef<Register> Src2Regs,
3734                                         LLT NarrowTy) {
3735   MachineIRBuilder &B = MIRBuilder;
3736   unsigned SrcParts = Src1Regs.size();
3737   unsigned DstParts = DstRegs.size();
3738 
3739   unsigned DstIdx = 0; // Low bits of the result.
3740   Register FactorSum =
3741       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3742   DstRegs[DstIdx] = FactorSum;
3743 
3744   unsigned CarrySumPrevDstIdx;
3745   SmallVector<Register, 4> Factors;
3746 
3747   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3748     // Collect low parts of muls for DstIdx.
3749     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3750          i <= std::min(DstIdx, SrcParts - 1); ++i) {
3751       MachineInstrBuilder Mul =
3752           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3753       Factors.push_back(Mul.getReg(0));
3754     }
3755     // Collect high parts of muls from previous DstIdx.
3756     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3757          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3758       MachineInstrBuilder Umulh =
3759           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3760       Factors.push_back(Umulh.getReg(0));
3761     }
3762     // Add CarrySum from additions calculated for previous DstIdx.
3763     if (DstIdx != 1) {
3764       Factors.push_back(CarrySumPrevDstIdx);
3765     }
3766 
3767     Register CarrySum;
3768     // Add all factors and accumulate all carries into CarrySum.
3769     if (DstIdx != DstParts - 1) {
3770       MachineInstrBuilder Uaddo =
3771           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3772       FactorSum = Uaddo.getReg(0);
3773       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3774       for (unsigned i = 2; i < Factors.size(); ++i) {
3775         MachineInstrBuilder Uaddo =
3776             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3777         FactorSum = Uaddo.getReg(0);
3778         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3779         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3780       }
3781     } else {
3782       // Since value for the next index is not calculated, neither is CarrySum.
3783       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3784       for (unsigned i = 2; i < Factors.size(); ++i)
3785         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3786     }
3787 
3788     CarrySumPrevDstIdx = CarrySum;
3789     DstRegs[DstIdx] = FactorSum;
3790     Factors.clear();
3791   }
3792 }
3793 
3794 LegalizerHelper::LegalizeResult
3795 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3796   Register DstReg = MI.getOperand(0).getReg();
3797   Register Src1 = MI.getOperand(1).getReg();
3798   Register Src2 = MI.getOperand(2).getReg();
3799 
3800   LLT Ty = MRI.getType(DstReg);
3801   if (Ty.isVector())
3802     return UnableToLegalize;
3803 
3804   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3805   unsigned DstSize = Ty.getSizeInBits();
3806   unsigned NarrowSize = NarrowTy.getSizeInBits();
3807   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3808     return UnableToLegalize;
3809 
3810   unsigned NumDstParts = DstSize / NarrowSize;
3811   unsigned NumSrcParts = SrcSize / NarrowSize;
3812   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3813   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3814 
3815   SmallVector<Register, 2> Src1Parts, Src2Parts;
3816   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
3817   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3818   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3819   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3820 
3821   // Take only high half of registers if this is high mul.
3822   ArrayRef<Register> DstRegs(
3823       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3824   MIRBuilder.buildMerge(DstReg, DstRegs);
3825   MI.eraseFromParent();
3826   return Legalized;
3827 }
3828 
3829 LegalizerHelper::LegalizeResult
3830 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3831                                      LLT NarrowTy) {
3832   if (TypeIdx != 1)
3833     return UnableToLegalize;
3834 
3835   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3836 
3837   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3838   // FIXME: add support for when SizeOp1 isn't an exact multiple of
3839   // NarrowSize.
3840   if (SizeOp1 % NarrowSize != 0)
3841     return UnableToLegalize;
3842   int NumParts = SizeOp1 / NarrowSize;
3843 
3844   SmallVector<Register, 2> SrcRegs, DstRegs;
3845   SmallVector<uint64_t, 2> Indexes;
3846   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3847 
3848   Register OpReg = MI.getOperand(0).getReg();
3849   uint64_t OpStart = MI.getOperand(2).getImm();
3850   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3851   for (int i = 0; i < NumParts; ++i) {
3852     unsigned SrcStart = i * NarrowSize;
3853 
3854     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3855       // No part of the extract uses this subregister, ignore it.
3856       continue;
3857     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3858       // The entire subregister is extracted, forward the value.
3859       DstRegs.push_back(SrcRegs[i]);
3860       continue;
3861     }
3862 
3863     // OpSegStart is where this destination segment would start in OpReg if it
3864     // extended infinitely in both directions.
3865     int64_t ExtractOffset;
3866     uint64_t SegSize;
3867     if (OpStart < SrcStart) {
3868       ExtractOffset = 0;
3869       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3870     } else {
3871       ExtractOffset = OpStart - SrcStart;
3872       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3873     }
3874 
3875     Register SegReg = SrcRegs[i];
3876     if (ExtractOffset != 0 || SegSize != NarrowSize) {
3877       // A genuine extract is needed.
3878       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3879       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3880     }
3881 
3882     DstRegs.push_back(SegReg);
3883   }
3884 
3885   Register DstReg = MI.getOperand(0).getReg();
3886   if (MRI.getType(DstReg).isVector())
3887     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3888   else if (DstRegs.size() > 1)
3889     MIRBuilder.buildMerge(DstReg, DstRegs);
3890   else
3891     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
3892   MI.eraseFromParent();
3893   return Legalized;
3894 }
3895 
3896 LegalizerHelper::LegalizeResult
3897 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3898                                     LLT NarrowTy) {
3899   // FIXME: Don't know how to handle secondary types yet.
3900   if (TypeIdx != 0)
3901     return UnableToLegalize;
3902 
3903   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3904   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3905 
3906   // FIXME: add support for when SizeOp0 isn't an exact multiple of
3907   // NarrowSize.
3908   if (SizeOp0 % NarrowSize != 0)
3909     return UnableToLegalize;
3910 
3911   int NumParts = SizeOp0 / NarrowSize;
3912 
3913   SmallVector<Register, 2> SrcRegs, DstRegs;
3914   SmallVector<uint64_t, 2> Indexes;
3915   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3916 
3917   Register OpReg = MI.getOperand(2).getReg();
3918   uint64_t OpStart = MI.getOperand(3).getImm();
3919   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3920   for (int i = 0; i < NumParts; ++i) {
3921     unsigned DstStart = i * NarrowSize;
3922 
3923     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3924       // No part of the insert affects this subregister, forward the original.
3925       DstRegs.push_back(SrcRegs[i]);
3926       continue;
3927     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3928       // The entire subregister is defined by this insert, forward the new
3929       // value.
3930       DstRegs.push_back(OpReg);
3931       continue;
3932     }
3933 
3934     // OpSegStart is where this destination segment would start in OpReg if it
3935     // extended infinitely in both directions.
3936     int64_t ExtractOffset, InsertOffset;
3937     uint64_t SegSize;
3938     if (OpStart < DstStart) {
3939       InsertOffset = 0;
3940       ExtractOffset = DstStart - OpStart;
3941       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3942     } else {
3943       InsertOffset = OpStart - DstStart;
3944       ExtractOffset = 0;
3945       SegSize =
3946         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3947     }
3948 
3949     Register SegReg = OpReg;
3950     if (ExtractOffset != 0 || SegSize != OpSize) {
3951       // A genuine extract is needed.
3952       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3953       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3954     }
3955 
3956     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3957     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3958     DstRegs.push_back(DstReg);
3959   }
3960 
3961   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3962   Register DstReg = MI.getOperand(0).getReg();
3963   if(MRI.getType(DstReg).isVector())
3964     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3965   else
3966     MIRBuilder.buildMerge(DstReg, DstRegs);
3967   MI.eraseFromParent();
3968   return Legalized;
3969 }
3970 
3971 LegalizerHelper::LegalizeResult
3972 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3973                                    LLT NarrowTy) {
3974   Register DstReg = MI.getOperand(0).getReg();
3975   LLT DstTy = MRI.getType(DstReg);
3976 
3977   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3978 
3979   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3980   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3981   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3982   LLT LeftoverTy;
3983   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3984                     Src0Regs, Src0LeftoverRegs))
3985     return UnableToLegalize;
3986 
3987   LLT Unused;
3988   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3989                     Src1Regs, Src1LeftoverRegs))
3990     llvm_unreachable("inconsistent extractParts result");
3991 
3992   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3993     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3994                                         {Src0Regs[I], Src1Regs[I]});
3995     DstRegs.push_back(Inst.getReg(0));
3996   }
3997 
3998   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3999     auto Inst = MIRBuilder.buildInstr(
4000       MI.getOpcode(),
4001       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
4002     DstLeftoverRegs.push_back(Inst.getReg(0));
4003   }
4004 
4005   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4006               LeftoverTy, DstLeftoverRegs);
4007 
4008   MI.eraseFromParent();
4009   return Legalized;
4010 }
4011 
4012 LegalizerHelper::LegalizeResult
4013 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
4014                                  LLT NarrowTy) {
4015   if (TypeIdx != 0)
4016     return UnableToLegalize;
4017 
4018   Register DstReg = MI.getOperand(0).getReg();
4019   Register SrcReg = MI.getOperand(1).getReg();
4020 
4021   LLT DstTy = MRI.getType(DstReg);
4022   if (DstTy.isVector())
4023     return UnableToLegalize;
4024 
4025   SmallVector<Register, 8> Parts;
4026   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4027   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
4028   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4029 
4030   MI.eraseFromParent();
4031   return Legalized;
4032 }
4033 
4034 LegalizerHelper::LegalizeResult
4035 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
4036                                     LLT NarrowTy) {
4037   if (TypeIdx != 0)
4038     return UnableToLegalize;
4039 
4040   Register CondReg = MI.getOperand(1).getReg();
4041   LLT CondTy = MRI.getType(CondReg);
4042   if (CondTy.isVector()) // TODO: Handle vselect
4043     return UnableToLegalize;
4044 
4045   Register DstReg = MI.getOperand(0).getReg();
4046   LLT DstTy = MRI.getType(DstReg);
4047 
4048   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4049   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4050   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
4051   LLT LeftoverTy;
4052   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
4053                     Src1Regs, Src1LeftoverRegs))
4054     return UnableToLegalize;
4055 
4056   LLT Unused;
4057   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
4058                     Src2Regs, Src2LeftoverRegs))
4059     llvm_unreachable("inconsistent extractParts result");
4060 
4061   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4062     auto Select = MIRBuilder.buildSelect(NarrowTy,
4063                                          CondReg, Src1Regs[I], Src2Regs[I]);
4064     DstRegs.push_back(Select.getReg(0));
4065   }
4066 
4067   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4068     auto Select = MIRBuilder.buildSelect(
4069       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
4070     DstLeftoverRegs.push_back(Select.getReg(0));
4071   }
4072 
4073   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4074               LeftoverTy, DstLeftoverRegs);
4075 
4076   MI.eraseFromParent();
4077   return Legalized;
4078 }
4079 
4080 LegalizerHelper::LegalizeResult
4081 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
4082                                   LLT NarrowTy) {
4083   if (TypeIdx != 1)
4084     return UnableToLegalize;
4085 
4086   Register DstReg = MI.getOperand(0).getReg();
4087   Register SrcReg = MI.getOperand(1).getReg();
4088   LLT DstTy = MRI.getType(DstReg);
4089   LLT SrcTy = MRI.getType(SrcReg);
4090   unsigned NarrowSize = NarrowTy.getSizeInBits();
4091 
4092   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4093     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
4094 
4095     MachineIRBuilder &B = MIRBuilder;
4096     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4097     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
4098     auto C_0 = B.buildConstant(NarrowTy, 0);
4099     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4100                                 UnmergeSrc.getReg(1), C_0);
4101     auto LoCTLZ = IsUndef ?
4102       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4103       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
4104     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4105     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
4106     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
4107     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
4108 
4109     MI.eraseFromParent();
4110     return Legalized;
4111   }
4112 
4113   return UnableToLegalize;
4114 }
4115 
4116 LegalizerHelper::LegalizeResult
4117 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
4118                                   LLT NarrowTy) {
4119   if (TypeIdx != 1)
4120     return UnableToLegalize;
4121 
4122   Register DstReg = MI.getOperand(0).getReg();
4123   Register SrcReg = MI.getOperand(1).getReg();
4124   LLT DstTy = MRI.getType(DstReg);
4125   LLT SrcTy = MRI.getType(SrcReg);
4126   unsigned NarrowSize = NarrowTy.getSizeInBits();
4127 
4128   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4129     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
4130 
4131     MachineIRBuilder &B = MIRBuilder;
4132     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4133     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
4134     auto C_0 = B.buildConstant(NarrowTy, 0);
4135     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4136                                 UnmergeSrc.getReg(0), C_0);
4137     auto HiCTTZ = IsUndef ?
4138       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
4139       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
4140     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4141     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
4142     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
4143     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
4144 
4145     MI.eraseFromParent();
4146     return Legalized;
4147   }
4148 
4149   return UnableToLegalize;
4150 }
4151 
4152 LegalizerHelper::LegalizeResult
4153 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
4154                                    LLT NarrowTy) {
4155   if (TypeIdx != 1)
4156     return UnableToLegalize;
4157 
4158   Register DstReg = MI.getOperand(0).getReg();
4159   LLT DstTy = MRI.getType(DstReg);
4160   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
4161   unsigned NarrowSize = NarrowTy.getSizeInBits();
4162 
4163   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4164     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
4165 
4166     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
4167     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
4168     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
4169 
4170     MI.eraseFromParent();
4171     return Legalized;
4172   }
4173 
4174   return UnableToLegalize;
4175 }
4176 
4177 LegalizerHelper::LegalizeResult
4178 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4179   unsigned Opc = MI.getOpcode();
4180   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
4181   auto isSupported = [this](const LegalityQuery &Q) {
4182     auto QAction = LI.getAction(Q).Action;
4183     return QAction == Legal || QAction == Libcall || QAction == Custom;
4184   };
4185   switch (Opc) {
4186   default:
4187     return UnableToLegalize;
4188   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
4189     // This trivially expands to CTLZ.
4190     Observer.changingInstr(MI);
4191     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
4192     Observer.changedInstr(MI);
4193     return Legalized;
4194   }
4195   case TargetOpcode::G_CTLZ: {
4196     Register DstReg = MI.getOperand(0).getReg();
4197     Register SrcReg = MI.getOperand(1).getReg();
4198     LLT DstTy = MRI.getType(DstReg);
4199     LLT SrcTy = MRI.getType(SrcReg);
4200     unsigned Len = SrcTy.getSizeInBits();
4201 
4202     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4203       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
4204       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
4205       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
4206       auto ICmp = MIRBuilder.buildICmp(
4207           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
4208       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4209       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
4210       MI.eraseFromParent();
4211       return Legalized;
4212     }
4213     // for now, we do this:
4214     // NewLen = NextPowerOf2(Len);
4215     // x = x | (x >> 1);
4216     // x = x | (x >> 2);
4217     // ...
4218     // x = x | (x >>16);
4219     // x = x | (x >>32); // for 64-bit input
4220     // Upto NewLen/2
4221     // return Len - popcount(x);
4222     //
4223     // Ref: "Hacker's Delight" by Henry Warren
4224     Register Op = SrcReg;
4225     unsigned NewLen = PowerOf2Ceil(Len);
4226     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
4227       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
4228       auto MIBOp = MIRBuilder.buildOr(
4229           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
4230       Op = MIBOp.getReg(0);
4231     }
4232     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
4233     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
4234                         MIBPop);
4235     MI.eraseFromParent();
4236     return Legalized;
4237   }
4238   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
4239     // This trivially expands to CTTZ.
4240     Observer.changingInstr(MI);
4241     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
4242     Observer.changedInstr(MI);
4243     return Legalized;
4244   }
4245   case TargetOpcode::G_CTTZ: {
4246     Register DstReg = MI.getOperand(0).getReg();
4247     Register SrcReg = MI.getOperand(1).getReg();
4248     LLT DstTy = MRI.getType(DstReg);
4249     LLT SrcTy = MRI.getType(SrcReg);
4250 
4251     unsigned Len = SrcTy.getSizeInBits();
4252     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4253       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
4254       // zero.
4255       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
4256       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
4257       auto ICmp = MIRBuilder.buildICmp(
4258           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
4259       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4260       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
4261       MI.eraseFromParent();
4262       return Legalized;
4263     }
4264     // for now, we use: { return popcount(~x & (x - 1)); }
4265     // unless the target has ctlz but not ctpop, in which case we use:
4266     // { return 32 - nlz(~x & (x-1)); }
4267     // Ref: "Hacker's Delight" by Henry Warren
4268     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
4269     auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
4270     auto MIBTmp = MIRBuilder.buildAnd(
4271         Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
4272     if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
4273         isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
4274       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
4275       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
4276                           MIRBuilder.buildCTLZ(Ty, MIBTmp));
4277       MI.eraseFromParent();
4278       return Legalized;
4279     }
4280     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
4281     MI.getOperand(1).setReg(MIBTmp.getReg(0));
4282     return Legalized;
4283   }
4284   case TargetOpcode::G_CTPOP: {
4285     unsigned Size = Ty.getSizeInBits();
4286     MachineIRBuilder &B = MIRBuilder;
4287 
4288     // Count set bits in blocks of 2 bits. Default approach would be
4289     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
4290     // We use following formula instead:
4291     // B2Count = val - { (val >> 1) & 0x55555555 }
4292     // since it gives same result in blocks of 2 with one instruction less.
4293     auto C_1 = B.buildConstant(Ty, 1);
4294     auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1);
4295     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
4296     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
4297     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
4298     auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi);
4299 
4300     // In order to get count in blocks of 4 add values from adjacent block of 2.
4301     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
4302     auto C_2 = B.buildConstant(Ty, 2);
4303     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
4304     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
4305     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
4306     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
4307     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
4308     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
4309 
4310     // For count in blocks of 8 bits we don't have to mask high 4 bits before
4311     // addition since count value sits in range {0,...,8} and 4 bits are enough
4312     // to hold such binary values. After addition high 4 bits still hold count
4313     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
4314     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
4315     auto C_4 = B.buildConstant(Ty, 4);
4316     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
4317     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
4318     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
4319     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
4320     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
4321 
4322     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
4323     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
4324     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
4325     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
4326     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
4327 
4328     // Shift count result from 8 high bits to low bits.
4329     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
4330     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
4331 
4332     MI.eraseFromParent();
4333     return Legalized;
4334   }
4335   }
4336 }
4337 
4338 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
4339 // representation.
4340 LegalizerHelper::LegalizeResult
4341 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
4342   Register Dst = MI.getOperand(0).getReg();
4343   Register Src = MI.getOperand(1).getReg();
4344   const LLT S64 = LLT::scalar(64);
4345   const LLT S32 = LLT::scalar(32);
4346   const LLT S1 = LLT::scalar(1);
4347 
4348   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
4349 
4350   // unsigned cul2f(ulong u) {
4351   //   uint lz = clz(u);
4352   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
4353   //   u = (u << lz) & 0x7fffffffffffffffUL;
4354   //   ulong t = u & 0xffffffffffUL;
4355   //   uint v = (e << 23) | (uint)(u >> 40);
4356   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
4357   //   return as_float(v + r);
4358   // }
4359 
4360   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
4361   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
4362 
4363   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
4364 
4365   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
4366   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
4367 
4368   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
4369   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
4370 
4371   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
4372   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
4373 
4374   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
4375 
4376   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
4377   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
4378 
4379   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
4380   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
4381   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
4382 
4383   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
4384   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
4385   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
4386   auto One = MIRBuilder.buildConstant(S32, 1);
4387 
4388   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
4389   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
4390   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
4391   MIRBuilder.buildAdd(Dst, V, R);
4392 
4393   return Legalized;
4394 }
4395 
4396 LegalizerHelper::LegalizeResult
4397 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4398   Register Dst = MI.getOperand(0).getReg();
4399   Register Src = MI.getOperand(1).getReg();
4400   LLT DstTy = MRI.getType(Dst);
4401   LLT SrcTy = MRI.getType(Src);
4402 
4403   if (SrcTy == LLT::scalar(1)) {
4404     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
4405     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4406     MIRBuilder.buildSelect(Dst, Src, True, False);
4407     MI.eraseFromParent();
4408     return Legalized;
4409   }
4410 
4411   if (SrcTy != LLT::scalar(64))
4412     return UnableToLegalize;
4413 
4414   if (DstTy == LLT::scalar(32)) {
4415     // TODO: SelectionDAG has several alternative expansions to port which may
4416     // be more reasonble depending on the available instructions. If a target
4417     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
4418     // intermediate type, this is probably worse.
4419     return lowerU64ToF32BitOps(MI);
4420   }
4421 
4422   return UnableToLegalize;
4423 }
4424 
4425 LegalizerHelper::LegalizeResult
4426 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4427   Register Dst = MI.getOperand(0).getReg();
4428   Register Src = MI.getOperand(1).getReg();
4429   LLT DstTy = MRI.getType(Dst);
4430   LLT SrcTy = MRI.getType(Src);
4431 
4432   const LLT S64 = LLT::scalar(64);
4433   const LLT S32 = LLT::scalar(32);
4434   const LLT S1 = LLT::scalar(1);
4435 
4436   if (SrcTy == S1) {
4437     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
4438     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
4439     MIRBuilder.buildSelect(Dst, Src, True, False);
4440     MI.eraseFromParent();
4441     return Legalized;
4442   }
4443 
4444   if (SrcTy != S64)
4445     return UnableToLegalize;
4446 
4447   if (DstTy == S32) {
4448     // signed cl2f(long l) {
4449     //   long s = l >> 63;
4450     //   float r = cul2f((l + s) ^ s);
4451     //   return s ? -r : r;
4452     // }
4453     Register L = Src;
4454     auto SignBit = MIRBuilder.buildConstant(S64, 63);
4455     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
4456 
4457     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
4458     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
4459     auto R = MIRBuilder.buildUITOFP(S32, Xor);
4460 
4461     auto RNeg = MIRBuilder.buildFNeg(S32, R);
4462     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
4463                                             MIRBuilder.buildConstant(S64, 0));
4464     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
4465     return Legalized;
4466   }
4467 
4468   return UnableToLegalize;
4469 }
4470 
4471 LegalizerHelper::LegalizeResult
4472 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4473   Register Dst = MI.getOperand(0).getReg();
4474   Register Src = MI.getOperand(1).getReg();
4475   LLT DstTy = MRI.getType(Dst);
4476   LLT SrcTy = MRI.getType(Src);
4477   const LLT S64 = LLT::scalar(64);
4478   const LLT S32 = LLT::scalar(32);
4479 
4480   if (SrcTy != S64 && SrcTy != S32)
4481     return UnableToLegalize;
4482   if (DstTy != S32 && DstTy != S64)
4483     return UnableToLegalize;
4484 
4485   // FPTOSI gives same result as FPTOUI for positive signed integers.
4486   // FPTOUI needs to deal with fp values that convert to unsigned integers
4487   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
4488 
4489   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
4490   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
4491                                                 : APFloat::IEEEdouble(),
4492                     APInt::getNullValue(SrcTy.getSizeInBits()));
4493   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
4494 
4495   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
4496 
4497   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
4498   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
4499   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
4500   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
4501   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
4502   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
4503   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
4504 
4505   const LLT S1 = LLT::scalar(1);
4506 
4507   MachineInstrBuilder FCMP =
4508       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
4509   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
4510 
4511   MI.eraseFromParent();
4512   return Legalized;
4513 }
4514 
4515 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
4516   Register Dst = MI.getOperand(0).getReg();
4517   Register Src = MI.getOperand(1).getReg();
4518   LLT DstTy = MRI.getType(Dst);
4519   LLT SrcTy = MRI.getType(Src);
4520   const LLT S64 = LLT::scalar(64);
4521   const LLT S32 = LLT::scalar(32);
4522 
4523   // FIXME: Only f32 to i64 conversions are supported.
4524   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
4525     return UnableToLegalize;
4526 
4527   // Expand f32 -> i64 conversion
4528   // This algorithm comes from compiler-rt's implementation of fixsfdi:
4529   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
4530 
4531   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
4532 
4533   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
4534   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
4535 
4536   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
4537   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
4538 
4539   auto SignMask = MIRBuilder.buildConstant(SrcTy,
4540                                            APInt::getSignMask(SrcEltBits));
4541   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
4542   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
4543   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
4544   Sign = MIRBuilder.buildSExt(DstTy, Sign);
4545 
4546   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
4547   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
4548   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
4549 
4550   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
4551   R = MIRBuilder.buildZExt(DstTy, R);
4552 
4553   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
4554   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
4555   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
4556   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
4557 
4558   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
4559   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
4560 
4561   const LLT S1 = LLT::scalar(1);
4562   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
4563                                     S1, Exponent, ExponentLoBit);
4564 
4565   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
4566 
4567   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
4568   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
4569 
4570   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
4571 
4572   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
4573                                           S1, Exponent, ZeroSrcTy);
4574 
4575   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
4576   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
4577 
4578   MI.eraseFromParent();
4579   return Legalized;
4580 }
4581 
4582 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
4583 LegalizerHelper::LegalizeResult
4584 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
4585   Register Dst = MI.getOperand(0).getReg();
4586   Register Src = MI.getOperand(1).getReg();
4587 
4588   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
4589     return UnableToLegalize;
4590 
4591   const unsigned ExpMask = 0x7ff;
4592   const unsigned ExpBiasf64 = 1023;
4593   const unsigned ExpBiasf16 = 15;
4594   const LLT S32 = LLT::scalar(32);
4595   const LLT S1 = LLT::scalar(1);
4596 
4597   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
4598   Register U = Unmerge.getReg(0);
4599   Register UH = Unmerge.getReg(1);
4600 
4601   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
4602 
4603   // Subtract the fp64 exponent bias (1023) to get the real exponent and
4604   // add the f16 bias (15) to get the biased exponent for the f16 format.
4605   E = MIRBuilder.buildAdd(
4606     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
4607   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
4608 
4609   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
4610   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
4611 
4612   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
4613                                        MIRBuilder.buildConstant(S32, 0x1ff));
4614   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
4615 
4616   auto Zero = MIRBuilder.buildConstant(S32, 0);
4617   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
4618   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
4619   M = MIRBuilder.buildOr(S32, M, Lo40Set);
4620 
4621   // (M != 0 ? 0x0200 : 0) | 0x7c00;
4622   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
4623   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
4624   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
4625 
4626   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
4627   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
4628 
4629   // N = M | (E << 12);
4630   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
4631   auto N = MIRBuilder.buildOr(S32, M, EShl12);
4632 
4633   // B = clamp(1-E, 0, 13);
4634   auto One = MIRBuilder.buildConstant(S32, 1);
4635   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
4636   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
4637   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
4638 
4639   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
4640                                        MIRBuilder.buildConstant(S32, 0x1000));
4641 
4642   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
4643   auto D0 = MIRBuilder.buildShl(S32, D, B);
4644 
4645   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
4646                                              D0, SigSetHigh);
4647   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
4648   D = MIRBuilder.buildOr(S32, D, D1);
4649 
4650   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
4651   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
4652 
4653   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
4654   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
4655 
4656   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
4657                                        MIRBuilder.buildConstant(S32, 3));
4658   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
4659 
4660   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
4661                                        MIRBuilder.buildConstant(S32, 5));
4662   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
4663 
4664   V1 = MIRBuilder.buildOr(S32, V0, V1);
4665   V = MIRBuilder.buildAdd(S32, V, V1);
4666 
4667   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
4668                                        E, MIRBuilder.buildConstant(S32, 30));
4669   V = MIRBuilder.buildSelect(S32, CmpEGt30,
4670                              MIRBuilder.buildConstant(S32, 0x7c00), V);
4671 
4672   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
4673                                          E, MIRBuilder.buildConstant(S32, 1039));
4674   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
4675 
4676   // Extract the sign bit.
4677   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
4678   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
4679 
4680   // Insert the sign bit
4681   V = MIRBuilder.buildOr(S32, Sign, V);
4682 
4683   MIRBuilder.buildTrunc(Dst, V);
4684   MI.eraseFromParent();
4685   return Legalized;
4686 }
4687 
4688 LegalizerHelper::LegalizeResult
4689 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4690   Register Dst = MI.getOperand(0).getReg();
4691   Register Src = MI.getOperand(1).getReg();
4692 
4693   LLT DstTy = MRI.getType(Dst);
4694   LLT SrcTy = MRI.getType(Src);
4695   const LLT S64 = LLT::scalar(64);
4696   const LLT S16 = LLT::scalar(16);
4697 
4698   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
4699     return lowerFPTRUNC_F64_TO_F16(MI);
4700 
4701   return UnableToLegalize;
4702 }
4703 
4704 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
4705   switch (Opc) {
4706   case TargetOpcode::G_SMIN:
4707     return CmpInst::ICMP_SLT;
4708   case TargetOpcode::G_SMAX:
4709     return CmpInst::ICMP_SGT;
4710   case TargetOpcode::G_UMIN:
4711     return CmpInst::ICMP_ULT;
4712   case TargetOpcode::G_UMAX:
4713     return CmpInst::ICMP_UGT;
4714   default:
4715     llvm_unreachable("not in integer min/max");
4716   }
4717 }
4718 
4719 LegalizerHelper::LegalizeResult
4720 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4721   Register Dst = MI.getOperand(0).getReg();
4722   Register Src0 = MI.getOperand(1).getReg();
4723   Register Src1 = MI.getOperand(2).getReg();
4724 
4725   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
4726   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
4727 
4728   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
4729   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
4730 
4731   MI.eraseFromParent();
4732   return Legalized;
4733 }
4734 
4735 LegalizerHelper::LegalizeResult
4736 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4737   Register Dst = MI.getOperand(0).getReg();
4738   Register Src0 = MI.getOperand(1).getReg();
4739   Register Src1 = MI.getOperand(2).getReg();
4740 
4741   const LLT Src0Ty = MRI.getType(Src0);
4742   const LLT Src1Ty = MRI.getType(Src1);
4743 
4744   const int Src0Size = Src0Ty.getScalarSizeInBits();
4745   const int Src1Size = Src1Ty.getScalarSizeInBits();
4746 
4747   auto SignBitMask = MIRBuilder.buildConstant(
4748     Src0Ty, APInt::getSignMask(Src0Size));
4749 
4750   auto NotSignBitMask = MIRBuilder.buildConstant(
4751     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
4752 
4753   auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
4754   MachineInstr *Or;
4755 
4756   if (Src0Ty == Src1Ty) {
4757     auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask);
4758     Or = MIRBuilder.buildOr(Dst, And0, And1);
4759   } else if (Src0Size > Src1Size) {
4760     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
4761     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
4762     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
4763     auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
4764     Or = MIRBuilder.buildOr(Dst, And0, And1);
4765   } else {
4766     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
4767     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
4768     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
4769     auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
4770     Or = MIRBuilder.buildOr(Dst, And0, And1);
4771   }
4772 
4773   // Be careful about setting nsz/nnan/ninf on every instruction, since the
4774   // constants are a nan and -0.0, but the final result should preserve
4775   // everything.
4776   if (unsigned Flags = MI.getFlags())
4777     Or->setFlags(Flags);
4778 
4779   MI.eraseFromParent();
4780   return Legalized;
4781 }
4782 
4783 LegalizerHelper::LegalizeResult
4784 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
4785   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4786     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4787 
4788   Register Dst = MI.getOperand(0).getReg();
4789   Register Src0 = MI.getOperand(1).getReg();
4790   Register Src1 = MI.getOperand(2).getReg();
4791   LLT Ty = MRI.getType(Dst);
4792 
4793   if (!MI.getFlag(MachineInstr::FmNoNans)) {
4794     // Insert canonicalizes if it's possible we need to quiet to get correct
4795     // sNaN behavior.
4796 
4797     // Note this must be done here, and not as an optimization combine in the
4798     // absence of a dedicate quiet-snan instruction as we're using an
4799     // omni-purpose G_FCANONICALIZE.
4800     if (!isKnownNeverSNaN(Src0, MRI))
4801       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4802 
4803     if (!isKnownNeverSNaN(Src1, MRI))
4804       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4805   }
4806 
4807   // If there are no nans, it's safe to simply replace this with the non-IEEE
4808   // version.
4809   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4810   MI.eraseFromParent();
4811   return Legalized;
4812 }
4813 
4814 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
4815   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4816   Register DstReg = MI.getOperand(0).getReg();
4817   LLT Ty = MRI.getType(DstReg);
4818   unsigned Flags = MI.getFlags();
4819 
4820   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4821                                   Flags);
4822   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4823   MI.eraseFromParent();
4824   return Legalized;
4825 }
4826 
4827 LegalizerHelper::LegalizeResult
4828 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
4829   Register DstReg = MI.getOperand(0).getReg();
4830   Register X = MI.getOperand(1).getReg();
4831   const unsigned Flags = MI.getFlags();
4832   const LLT Ty = MRI.getType(DstReg);
4833   const LLT CondTy = Ty.changeElementSize(1);
4834 
4835   // round(x) =>
4836   //  t = trunc(x);
4837   //  d = fabs(x - t);
4838   //  o = copysign(1.0f, x);
4839   //  return t + (d >= 0.5 ? o : 0.0);
4840 
4841   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
4842 
4843   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
4844   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
4845   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
4846   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
4847   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
4848   auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
4849 
4850   auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
4851                                   Flags);
4852   auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
4853 
4854   MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
4855 
4856   MI.eraseFromParent();
4857   return Legalized;
4858 }
4859 
4860 LegalizerHelper::LegalizeResult
4861 LegalizerHelper::lowerFFloor(MachineInstr &MI) {
4862   Register DstReg = MI.getOperand(0).getReg();
4863   Register SrcReg = MI.getOperand(1).getReg();
4864   unsigned Flags = MI.getFlags();
4865   LLT Ty = MRI.getType(DstReg);
4866   const LLT CondTy = Ty.changeElementSize(1);
4867 
4868   // result = trunc(src);
4869   // if (src < 0.0 && src != result)
4870   //   result += -1.0.
4871 
4872   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
4873   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
4874 
4875   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
4876                                   SrcReg, Zero, Flags);
4877   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
4878                                       SrcReg, Trunc, Flags);
4879   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
4880   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
4881 
4882   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
4883   MI.eraseFromParent();
4884   return Legalized;
4885 }
4886 
4887 LegalizerHelper::LegalizeResult
4888 LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
4889   const unsigned NumOps = MI.getNumOperands();
4890   Register DstReg = MI.getOperand(0).getReg();
4891   Register Src0Reg = MI.getOperand(1).getReg();
4892   LLT DstTy = MRI.getType(DstReg);
4893   LLT SrcTy = MRI.getType(Src0Reg);
4894   unsigned PartSize = SrcTy.getSizeInBits();
4895 
4896   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
4897   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
4898 
4899   for (unsigned I = 2; I != NumOps; ++I) {
4900     const unsigned Offset = (I - 1) * PartSize;
4901 
4902     Register SrcReg = MI.getOperand(I).getReg();
4903     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
4904 
4905     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
4906       MRI.createGenericVirtualRegister(WideTy);
4907 
4908     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
4909     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
4910     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
4911     ResultReg = NextResult;
4912   }
4913 
4914   if (DstTy.isPointer()) {
4915     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
4916           DstTy.getAddressSpace())) {
4917       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
4918       return UnableToLegalize;
4919     }
4920 
4921     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
4922   }
4923 
4924   MI.eraseFromParent();
4925   return Legalized;
4926 }
4927 
4928 LegalizerHelper::LegalizeResult
4929 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
4930   const unsigned NumDst = MI.getNumOperands() - 1;
4931   Register SrcReg = MI.getOperand(NumDst).getReg();
4932   Register Dst0Reg = MI.getOperand(0).getReg();
4933   LLT DstTy = MRI.getType(Dst0Reg);
4934   if (DstTy.isPointer())
4935     return UnableToLegalize; // TODO
4936 
4937   SrcReg = coerceToScalar(SrcReg);
4938   if (!SrcReg)
4939     return UnableToLegalize;
4940 
4941   // Expand scalarizing unmerge as bitcast to integer and shift.
4942   LLT IntTy = MRI.getType(SrcReg);
4943 
4944   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
4945 
4946   const unsigned DstSize = DstTy.getSizeInBits();
4947   unsigned Offset = DstSize;
4948   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
4949     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
4950     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
4951     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
4952   }
4953 
4954   MI.eraseFromParent();
4955   return Legalized;
4956 }
4957 
4958 LegalizerHelper::LegalizeResult
4959 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
4960   Register DstReg = MI.getOperand(0).getReg();
4961   Register Src0Reg = MI.getOperand(1).getReg();
4962   Register Src1Reg = MI.getOperand(2).getReg();
4963   LLT Src0Ty = MRI.getType(Src0Reg);
4964   LLT DstTy = MRI.getType(DstReg);
4965   LLT IdxTy = LLT::scalar(32);
4966 
4967   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4968 
4969   if (DstTy.isScalar()) {
4970     if (Src0Ty.isVector())
4971       return UnableToLegalize;
4972 
4973     // This is just a SELECT.
4974     assert(Mask.size() == 1 && "Expected a single mask element");
4975     Register Val;
4976     if (Mask[0] < 0 || Mask[0] > 1)
4977       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4978     else
4979       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4980     MIRBuilder.buildCopy(DstReg, Val);
4981     MI.eraseFromParent();
4982     return Legalized;
4983   }
4984 
4985   Register Undef;
4986   SmallVector<Register, 32> BuildVec;
4987   LLT EltTy = DstTy.getElementType();
4988 
4989   for (int Idx : Mask) {
4990     if (Idx < 0) {
4991       if (!Undef.isValid())
4992         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4993       BuildVec.push_back(Undef);
4994       continue;
4995     }
4996 
4997     if (Src0Ty.isScalar()) {
4998       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4999     } else {
5000       int NumElts = Src0Ty.getNumElements();
5001       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
5002       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
5003       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
5004       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
5005       BuildVec.push_back(Extract.getReg(0));
5006     }
5007   }
5008 
5009   MIRBuilder.buildBuildVector(DstReg, BuildVec);
5010   MI.eraseFromParent();
5011   return Legalized;
5012 }
5013 
5014 LegalizerHelper::LegalizeResult
5015 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
5016   Register Dst = MI.getOperand(0).getReg();
5017   Register AllocSize = MI.getOperand(1).getReg();
5018   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
5019 
5020   const auto &MF = *MI.getMF();
5021   const auto &TLI = *MF.getSubtarget().getTargetLowering();
5022 
5023   LLT PtrTy = MRI.getType(Dst);
5024   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
5025 
5026   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
5027   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
5028   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
5029 
5030   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
5031   // have to generate an extra instruction to negate the alloc and then use
5032   // G_PTR_ADD to add the negative offset.
5033   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
5034   if (Alignment > Align(1)) {
5035     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
5036     AlignMask.negate();
5037     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
5038     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
5039   }
5040 
5041   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
5042   MIRBuilder.buildCopy(SPReg, SPTmp);
5043   MIRBuilder.buildCopy(Dst, SPTmp);
5044 
5045   MI.eraseFromParent();
5046   return Legalized;
5047 }
5048 
5049 LegalizerHelper::LegalizeResult
5050 LegalizerHelper::lowerExtract(MachineInstr &MI) {
5051   Register Dst = MI.getOperand(0).getReg();
5052   Register Src = MI.getOperand(1).getReg();
5053   unsigned Offset = MI.getOperand(2).getImm();
5054 
5055   LLT DstTy = MRI.getType(Dst);
5056   LLT SrcTy = MRI.getType(Src);
5057 
5058   if (DstTy.isScalar() &&
5059       (SrcTy.isScalar() ||
5060        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
5061     LLT SrcIntTy = SrcTy;
5062     if (!SrcTy.isScalar()) {
5063       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
5064       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
5065     }
5066 
5067     if (Offset == 0)
5068       MIRBuilder.buildTrunc(Dst, Src);
5069     else {
5070       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
5071       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
5072       MIRBuilder.buildTrunc(Dst, Shr);
5073     }
5074 
5075     MI.eraseFromParent();
5076     return Legalized;
5077   }
5078 
5079   return UnableToLegalize;
5080 }
5081 
5082 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
5083   Register Dst = MI.getOperand(0).getReg();
5084   Register Src = MI.getOperand(1).getReg();
5085   Register InsertSrc = MI.getOperand(2).getReg();
5086   uint64_t Offset = MI.getOperand(3).getImm();
5087 
5088   LLT DstTy = MRI.getType(Src);
5089   LLT InsertTy = MRI.getType(InsertSrc);
5090 
5091   if (InsertTy.isVector() ||
5092       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
5093     return UnableToLegalize;
5094 
5095   const DataLayout &DL = MIRBuilder.getDataLayout();
5096   if ((DstTy.isPointer() &&
5097        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
5098       (InsertTy.isPointer() &&
5099        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
5100     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
5101     return UnableToLegalize;
5102   }
5103 
5104   LLT IntDstTy = DstTy;
5105 
5106   if (!DstTy.isScalar()) {
5107     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
5108     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
5109   }
5110 
5111   if (!InsertTy.isScalar()) {
5112     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
5113     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
5114   }
5115 
5116   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
5117   if (Offset != 0) {
5118     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
5119     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
5120   }
5121 
5122   APInt MaskVal = APInt::getBitsSetWithWrap(
5123       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
5124 
5125   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
5126   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
5127   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
5128 
5129   MIRBuilder.buildCast(Dst, Or);
5130   MI.eraseFromParent();
5131   return Legalized;
5132 }
5133 
5134 LegalizerHelper::LegalizeResult
5135 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
5136   Register Dst0 = MI.getOperand(0).getReg();
5137   Register Dst1 = MI.getOperand(1).getReg();
5138   Register LHS = MI.getOperand(2).getReg();
5139   Register RHS = MI.getOperand(3).getReg();
5140   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
5141 
5142   LLT Ty = MRI.getType(Dst0);
5143   LLT BoolTy = MRI.getType(Dst1);
5144 
5145   if (IsAdd)
5146     MIRBuilder.buildAdd(Dst0, LHS, RHS);
5147   else
5148     MIRBuilder.buildSub(Dst0, LHS, RHS);
5149 
5150   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
5151 
5152   auto Zero = MIRBuilder.buildConstant(Ty, 0);
5153 
5154   // For an addition, the result should be less than one of the operands (LHS)
5155   // if and only if the other operand (RHS) is negative, otherwise there will
5156   // be overflow.
5157   // For a subtraction, the result should be less than one of the operands
5158   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
5159   // otherwise there will be overflow.
5160   auto ResultLowerThanLHS =
5161       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
5162   auto ConditionRHS = MIRBuilder.buildICmp(
5163       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
5164 
5165   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
5166   MI.eraseFromParent();
5167   return Legalized;
5168 }
5169 
5170 LegalizerHelper::LegalizeResult
5171 LegalizerHelper::lowerBswap(MachineInstr &MI) {
5172   Register Dst = MI.getOperand(0).getReg();
5173   Register Src = MI.getOperand(1).getReg();
5174   const LLT Ty = MRI.getType(Src);
5175   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
5176   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
5177 
5178   // Swap most and least significant byte, set remaining bytes in Res to zero.
5179   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
5180   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
5181   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
5182   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
5183 
5184   // Set i-th high/low byte in Res to i-th low/high byte from Src.
5185   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
5186     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
5187     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
5188     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
5189     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
5190     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
5191     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
5192     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
5193     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
5194     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
5195     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
5196     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
5197     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
5198   }
5199   Res.getInstr()->getOperand(0).setReg(Dst);
5200 
5201   MI.eraseFromParent();
5202   return Legalized;
5203 }
5204 
5205 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
5206 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
5207                                  MachineInstrBuilder Src, APInt Mask) {
5208   const LLT Ty = Dst.getLLTTy(*B.getMRI());
5209   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
5210   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
5211   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
5212   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
5213   return B.buildOr(Dst, LHS, RHS);
5214 }
5215 
5216 LegalizerHelper::LegalizeResult
5217 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
5218   Register Dst = MI.getOperand(0).getReg();
5219   Register Src = MI.getOperand(1).getReg();
5220   const LLT Ty = MRI.getType(Src);
5221   unsigned Size = Ty.getSizeInBits();
5222 
5223   MachineInstrBuilder BSWAP =
5224       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
5225 
5226   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
5227   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
5228   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
5229   MachineInstrBuilder Swap4 =
5230       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
5231 
5232   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
5233   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
5234   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
5235   MachineInstrBuilder Swap2 =
5236       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
5237 
5238   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
5239   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
5240   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
5241   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
5242 
5243   MI.eraseFromParent();
5244   return Legalized;
5245 }
5246 
5247 LegalizerHelper::LegalizeResult
5248 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
5249   MachineFunction &MF = MIRBuilder.getMF();
5250   const TargetSubtargetInfo &STI = MF.getSubtarget();
5251   const TargetLowering *TLI = STI.getTargetLowering();
5252 
5253   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
5254   int NameOpIdx = IsRead ? 1 : 0;
5255   int ValRegIndex = IsRead ? 0 : 1;
5256 
5257   Register ValReg = MI.getOperand(ValRegIndex).getReg();
5258   const LLT Ty = MRI.getType(ValReg);
5259   const MDString *RegStr = cast<MDString>(
5260     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
5261 
5262   Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
5263   if (!PhysReg.isValid())
5264     return UnableToLegalize;
5265 
5266   if (IsRead)
5267     MIRBuilder.buildCopy(ValReg, PhysReg);
5268   else
5269     MIRBuilder.buildCopy(PhysReg, ValReg);
5270 
5271   MI.eraseFromParent();
5272   return Legalized;
5273 }
5274