1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetFrameLowering.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetLowering.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 
29 #define DEBUG_TYPE "legalizer"
30 
31 using namespace llvm;
32 using namespace LegalizeActions;
33 using namespace MIPatternMatch;
34 
35 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
36 ///
37 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
38 /// with any leftover piece as type \p LeftoverTy
39 ///
40 /// Returns -1 in the first element of the pair if the breakdown is not
41 /// satisfiable.
42 static std::pair<int, int>
43 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
44   assert(!LeftoverTy.isValid() && "this is an out argument");
45 
46   unsigned Size = OrigTy.getSizeInBits();
47   unsigned NarrowSize = NarrowTy.getSizeInBits();
48   unsigned NumParts = Size / NarrowSize;
49   unsigned LeftoverSize = Size - NumParts * NarrowSize;
50   assert(Size > NarrowSize);
51 
52   if (LeftoverSize == 0)
53     return {NumParts, 0};
54 
55   if (NarrowTy.isVector()) {
56     unsigned EltSize = OrigTy.getScalarSizeInBits();
57     if (LeftoverSize % EltSize != 0)
58       return {-1, -1};
59     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
60   } else {
61     LeftoverTy = LLT::scalar(LeftoverSize);
62   }
63 
64   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
65   return std::make_pair(NumParts, NumLeftover);
66 }
67 
68 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
69 
70   if (!Ty.isScalar())
71     return nullptr;
72 
73   switch (Ty.getSizeInBits()) {
74   case 16:
75     return Type::getHalfTy(Ctx);
76   case 32:
77     return Type::getFloatTy(Ctx);
78   case 64:
79     return Type::getDoubleTy(Ctx);
80   case 80:
81     return Type::getX86_FP80Ty(Ctx);
82   case 128:
83     return Type::getFP128Ty(Ctx);
84   default:
85     return nullptr;
86   }
87 }
88 
89 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
90                                  GISelChangeObserver &Observer,
91                                  MachineIRBuilder &Builder)
92     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
93       LI(*MF.getSubtarget().getLegalizerInfo()),
94       TLI(*MF.getSubtarget().getTargetLowering()) { }
95 
96 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
97                                  GISelChangeObserver &Observer,
98                                  MachineIRBuilder &B)
99   : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
100     TLI(*MF.getSubtarget().getTargetLowering()) { }
101 
102 LegalizerHelper::LegalizeResult
103 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
104   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
105 
106   MIRBuilder.setInstrAndDebugLoc(MI);
107 
108   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
109       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
110     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
111   auto Step = LI.getAction(MI, MRI);
112   switch (Step.Action) {
113   case Legal:
114     LLVM_DEBUG(dbgs() << ".. Already legal\n");
115     return AlreadyLegal;
116   case Libcall:
117     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
118     return libcall(MI);
119   case NarrowScalar:
120     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
121     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
122   case WidenScalar:
123     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
124     return widenScalar(MI, Step.TypeIdx, Step.NewType);
125   case Bitcast:
126     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
127     return bitcast(MI, Step.TypeIdx, Step.NewType);
128   case Lower:
129     LLVM_DEBUG(dbgs() << ".. Lower\n");
130     return lower(MI, Step.TypeIdx, Step.NewType);
131   case FewerElements:
132     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
133     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
134   case MoreElements:
135     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
136     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
137   case Custom:
138     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
139     return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
140   default:
141     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
142     return UnableToLegalize;
143   }
144 }
145 
146 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
147                                    SmallVectorImpl<Register> &VRegs) {
148   for (int i = 0; i < NumParts; ++i)
149     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
150   MIRBuilder.buildUnmerge(VRegs, Reg);
151 }
152 
153 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
154                                    LLT MainTy, LLT &LeftoverTy,
155                                    SmallVectorImpl<Register> &VRegs,
156                                    SmallVectorImpl<Register> &LeftoverRegs) {
157   assert(!LeftoverTy.isValid() && "this is an out argument");
158 
159   unsigned RegSize = RegTy.getSizeInBits();
160   unsigned MainSize = MainTy.getSizeInBits();
161   unsigned NumParts = RegSize / MainSize;
162   unsigned LeftoverSize = RegSize - NumParts * MainSize;
163 
164   // Use an unmerge when possible.
165   if (LeftoverSize == 0) {
166     for (unsigned I = 0; I < NumParts; ++I)
167       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
168     MIRBuilder.buildUnmerge(VRegs, Reg);
169     return true;
170   }
171 
172   if (MainTy.isVector()) {
173     unsigned EltSize = MainTy.getScalarSizeInBits();
174     if (LeftoverSize % EltSize != 0)
175       return false;
176     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
177   } else {
178     LeftoverTy = LLT::scalar(LeftoverSize);
179   }
180 
181   // For irregular sizes, extract the individual parts.
182   for (unsigned I = 0; I != NumParts; ++I) {
183     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
184     VRegs.push_back(NewReg);
185     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
186   }
187 
188   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
189        Offset += LeftoverSize) {
190     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
191     LeftoverRegs.push_back(NewReg);
192     MIRBuilder.buildExtract(NewReg, Reg, Offset);
193   }
194 
195   return true;
196 }
197 
198 void LegalizerHelper::insertParts(Register DstReg,
199                                   LLT ResultTy, LLT PartTy,
200                                   ArrayRef<Register> PartRegs,
201                                   LLT LeftoverTy,
202                                   ArrayRef<Register> LeftoverRegs) {
203   if (!LeftoverTy.isValid()) {
204     assert(LeftoverRegs.empty());
205 
206     if (!ResultTy.isVector()) {
207       MIRBuilder.buildMerge(DstReg, PartRegs);
208       return;
209     }
210 
211     if (PartTy.isVector())
212       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
213     else
214       MIRBuilder.buildBuildVector(DstReg, PartRegs);
215     return;
216   }
217 
218   unsigned PartSize = PartTy.getSizeInBits();
219   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
220 
221   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
222   MIRBuilder.buildUndef(CurResultReg);
223 
224   unsigned Offset = 0;
225   for (Register PartReg : PartRegs) {
226     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
227     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
228     CurResultReg = NewResultReg;
229     Offset += PartSize;
230   }
231 
232   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
233     // Use the original output register for the final insert to avoid a copy.
234     Register NewResultReg = (I + 1 == E) ?
235       DstReg : MRI.createGenericVirtualRegister(ResultTy);
236 
237     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
238     CurResultReg = NewResultReg;
239     Offset += LeftoverPartSize;
240   }
241 }
242 
243 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
244 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
245                               const MachineInstr &MI) {
246   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
247 
248   const int StartIdx = Regs.size();
249   const int NumResults = MI.getNumOperands() - 1;
250   Regs.resize(Regs.size() + NumResults);
251   for (int I = 0; I != NumResults; ++I)
252     Regs[StartIdx + I] = MI.getOperand(I).getReg();
253 }
254 
255 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
256                                      LLT GCDTy, Register SrcReg) {
257   LLT SrcTy = MRI.getType(SrcReg);
258   if (SrcTy == GCDTy) {
259     // If the source already evenly divides the result type, we don't need to do
260     // anything.
261     Parts.push_back(SrcReg);
262   } else {
263     // Need to split into common type sized pieces.
264     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
265     getUnmergeResults(Parts, *Unmerge);
266   }
267 }
268 
269 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
270                                     LLT NarrowTy, Register SrcReg) {
271   LLT SrcTy = MRI.getType(SrcReg);
272   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
273   extractGCDType(Parts, GCDTy, SrcReg);
274   return GCDTy;
275 }
276 
277 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
278                                          SmallVectorImpl<Register> &VRegs,
279                                          unsigned PadStrategy) {
280   LLT LCMTy = getLCMType(DstTy, NarrowTy);
281 
282   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
283   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
284   int NumOrigSrc = VRegs.size();
285 
286   Register PadReg;
287 
288   // Get a value we can use to pad the source value if the sources won't evenly
289   // cover the result type.
290   if (NumOrigSrc < NumParts * NumSubParts) {
291     if (PadStrategy == TargetOpcode::G_ZEXT)
292       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
293     else if (PadStrategy == TargetOpcode::G_ANYEXT)
294       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
295     else {
296       assert(PadStrategy == TargetOpcode::G_SEXT);
297 
298       // Shift the sign bit of the low register through the high register.
299       auto ShiftAmt =
300         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
301       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
302     }
303   }
304 
305   // Registers for the final merge to be produced.
306   SmallVector<Register, 4> Remerge(NumParts);
307 
308   // Registers needed for intermediate merges, which will be merged into a
309   // source for Remerge.
310   SmallVector<Register, 4> SubMerge(NumSubParts);
311 
312   // Once we've fully read off the end of the original source bits, we can reuse
313   // the same high bits for remaining padding elements.
314   Register AllPadReg;
315 
316   // Build merges to the LCM type to cover the original result type.
317   for (int I = 0; I != NumParts; ++I) {
318     bool AllMergePartsArePadding = true;
319 
320     // Build the requested merges to the requested type.
321     for (int J = 0; J != NumSubParts; ++J) {
322       int Idx = I * NumSubParts + J;
323       if (Idx >= NumOrigSrc) {
324         SubMerge[J] = PadReg;
325         continue;
326       }
327 
328       SubMerge[J] = VRegs[Idx];
329 
330       // There are meaningful bits here we can't reuse later.
331       AllMergePartsArePadding = false;
332     }
333 
334     // If we've filled up a complete piece with padding bits, we can directly
335     // emit the natural sized constant if applicable, rather than a merge of
336     // smaller constants.
337     if (AllMergePartsArePadding && !AllPadReg) {
338       if (PadStrategy == TargetOpcode::G_ANYEXT)
339         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
340       else if (PadStrategy == TargetOpcode::G_ZEXT)
341         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
342 
343       // If this is a sign extension, we can't materialize a trivial constant
344       // with the right type and have to produce a merge.
345     }
346 
347     if (AllPadReg) {
348       // Avoid creating additional instructions if we're just adding additional
349       // copies of padding bits.
350       Remerge[I] = AllPadReg;
351       continue;
352     }
353 
354     if (NumSubParts == 1)
355       Remerge[I] = SubMerge[0];
356     else
357       Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
358 
359     // In the sign extend padding case, re-use the first all-signbit merge.
360     if (AllMergePartsArePadding && !AllPadReg)
361       AllPadReg = Remerge[I];
362   }
363 
364   VRegs = std::move(Remerge);
365   return LCMTy;
366 }
367 
368 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
369                                                ArrayRef<Register> RemergeRegs) {
370   LLT DstTy = MRI.getType(DstReg);
371 
372   // Create the merge to the widened source, and extract the relevant bits into
373   // the result.
374 
375   if (DstTy == LCMTy) {
376     MIRBuilder.buildMerge(DstReg, RemergeRegs);
377     return;
378   }
379 
380   auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
381   if (DstTy.isScalar() && LCMTy.isScalar()) {
382     MIRBuilder.buildTrunc(DstReg, Remerge);
383     return;
384   }
385 
386   if (LCMTy.isVector()) {
387     unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
388     SmallVector<Register, 8> UnmergeDefs(NumDefs);
389     UnmergeDefs[0] = DstReg;
390     for (unsigned I = 1; I != NumDefs; ++I)
391       UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
392 
393     MIRBuilder.buildUnmerge(UnmergeDefs,
394                             MIRBuilder.buildMerge(LCMTy, RemergeRegs));
395     return;
396   }
397 
398   llvm_unreachable("unhandled case");
399 }
400 
401 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
402 #define RTLIBCASE_INT(LibcallPrefix)                                           \
403   do {                                                                         \
404     switch (Size) {                                                            \
405     case 32:                                                                   \
406       return RTLIB::LibcallPrefix##32;                                         \
407     case 64:                                                                   \
408       return RTLIB::LibcallPrefix##64;                                         \
409     case 128:                                                                  \
410       return RTLIB::LibcallPrefix##128;                                        \
411     default:                                                                   \
412       llvm_unreachable("unexpected size");                                     \
413     }                                                                          \
414   } while (0)
415 
416 #define RTLIBCASE(LibcallPrefix)                                               \
417   do {                                                                         \
418     switch (Size) {                                                            \
419     case 32:                                                                   \
420       return RTLIB::LibcallPrefix##32;                                         \
421     case 64:                                                                   \
422       return RTLIB::LibcallPrefix##64;                                         \
423     case 80:                                                                   \
424       return RTLIB::LibcallPrefix##80;                                         \
425     case 128:                                                                  \
426       return RTLIB::LibcallPrefix##128;                                        \
427     default:                                                                   \
428       llvm_unreachable("unexpected size");                                     \
429     }                                                                          \
430   } while (0)
431 
432   switch (Opcode) {
433   case TargetOpcode::G_SDIV:
434     RTLIBCASE_INT(SDIV_I);
435   case TargetOpcode::G_UDIV:
436     RTLIBCASE_INT(UDIV_I);
437   case TargetOpcode::G_SREM:
438     RTLIBCASE_INT(SREM_I);
439   case TargetOpcode::G_UREM:
440     RTLIBCASE_INT(UREM_I);
441   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
442     RTLIBCASE_INT(CTLZ_I);
443   case TargetOpcode::G_FADD:
444     RTLIBCASE(ADD_F);
445   case TargetOpcode::G_FSUB:
446     RTLIBCASE(SUB_F);
447   case TargetOpcode::G_FMUL:
448     RTLIBCASE(MUL_F);
449   case TargetOpcode::G_FDIV:
450     RTLIBCASE(DIV_F);
451   case TargetOpcode::G_FEXP:
452     RTLIBCASE(EXP_F);
453   case TargetOpcode::G_FEXP2:
454     RTLIBCASE(EXP2_F);
455   case TargetOpcode::G_FREM:
456     RTLIBCASE(REM_F);
457   case TargetOpcode::G_FPOW:
458     RTLIBCASE(POW_F);
459   case TargetOpcode::G_FMA:
460     RTLIBCASE(FMA_F);
461   case TargetOpcode::G_FSIN:
462     RTLIBCASE(SIN_F);
463   case TargetOpcode::G_FCOS:
464     RTLIBCASE(COS_F);
465   case TargetOpcode::G_FLOG10:
466     RTLIBCASE(LOG10_F);
467   case TargetOpcode::G_FLOG:
468     RTLIBCASE(LOG_F);
469   case TargetOpcode::G_FLOG2:
470     RTLIBCASE(LOG2_F);
471   case TargetOpcode::G_FCEIL:
472     RTLIBCASE(CEIL_F);
473   case TargetOpcode::G_FFLOOR:
474     RTLIBCASE(FLOOR_F);
475   case TargetOpcode::G_FMINNUM:
476     RTLIBCASE(FMIN_F);
477   case TargetOpcode::G_FMAXNUM:
478     RTLIBCASE(FMAX_F);
479   case TargetOpcode::G_FSQRT:
480     RTLIBCASE(SQRT_F);
481   case TargetOpcode::G_FRINT:
482     RTLIBCASE(RINT_F);
483   case TargetOpcode::G_FNEARBYINT:
484     RTLIBCASE(NEARBYINT_F);
485   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
486     RTLIBCASE(ROUNDEVEN_F);
487   }
488   llvm_unreachable("Unknown libcall function");
489 }
490 
491 /// True if an instruction is in tail position in its caller. Intended for
492 /// legalizing libcalls as tail calls when possible.
493 static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
494                                     MachineInstr &MI) {
495   MachineBasicBlock &MBB = *MI.getParent();
496   const Function &F = MBB.getParent()->getFunction();
497 
498   // Conservatively require the attributes of the call to match those of
499   // the return. Ignore NoAlias and NonNull because they don't affect the
500   // call sequence.
501   AttributeList CallerAttrs = F.getAttributes();
502   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
503           .removeAttribute(Attribute::NoAlias)
504           .removeAttribute(Attribute::NonNull)
505           .hasAttributes())
506     return false;
507 
508   // It's not safe to eliminate the sign / zero extension of the return value.
509   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
510       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
511     return false;
512 
513   // Only tail call if the following instruction is a standard return.
514   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
515   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
516     return false;
517 
518   return true;
519 }
520 
521 LegalizerHelper::LegalizeResult
522 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
523                     const CallLowering::ArgInfo &Result,
524                     ArrayRef<CallLowering::ArgInfo> Args,
525                     const CallingConv::ID CC) {
526   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
527 
528   CallLowering::CallLoweringInfo Info;
529   Info.CallConv = CC;
530   Info.Callee = MachineOperand::CreateES(Name);
531   Info.OrigRet = Result;
532   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
533   if (!CLI.lowerCall(MIRBuilder, Info))
534     return LegalizerHelper::UnableToLegalize;
535 
536   return LegalizerHelper::Legalized;
537 }
538 
539 LegalizerHelper::LegalizeResult
540 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
541                     const CallLowering::ArgInfo &Result,
542                     ArrayRef<CallLowering::ArgInfo> Args) {
543   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
544   const char *Name = TLI.getLibcallName(Libcall);
545   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
546   return createLibcall(MIRBuilder, Name, Result, Args, CC);
547 }
548 
549 // Useful for libcalls where all operands have the same type.
550 static LegalizerHelper::LegalizeResult
551 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
552               Type *OpType) {
553   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
554 
555   SmallVector<CallLowering::ArgInfo, 3> Args;
556   for (unsigned i = 1; i < MI.getNumOperands(); i++)
557     Args.push_back({MI.getOperand(i).getReg(), OpType});
558   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
559                        Args);
560 }
561 
562 LegalizerHelper::LegalizeResult
563 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
564                        MachineInstr &MI) {
565   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
566 
567   SmallVector<CallLowering::ArgInfo, 3> Args;
568   // Add all the args, except for the last which is an imm denoting 'tail'.
569   for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
570     Register Reg = MI.getOperand(i).getReg();
571 
572     // Need derive an IR type for call lowering.
573     LLT OpLLT = MRI.getType(Reg);
574     Type *OpTy = nullptr;
575     if (OpLLT.isPointer())
576       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
577     else
578       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
579     Args.push_back({Reg, OpTy});
580   }
581 
582   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
583   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
584   RTLIB::Libcall RTLibcall;
585   switch (MI.getOpcode()) {
586   case TargetOpcode::G_MEMCPY:
587     RTLibcall = RTLIB::MEMCPY;
588     break;
589   case TargetOpcode::G_MEMMOVE:
590     RTLibcall = RTLIB::MEMMOVE;
591     break;
592   case TargetOpcode::G_MEMSET:
593     RTLibcall = RTLIB::MEMSET;
594     break;
595   default:
596     return LegalizerHelper::UnableToLegalize;
597   }
598   const char *Name = TLI.getLibcallName(RTLibcall);
599 
600   CallLowering::CallLoweringInfo Info;
601   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
602   Info.Callee = MachineOperand::CreateES(Name);
603   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
604   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
605                     isLibCallInTailPosition(MIRBuilder.getTII(), MI);
606 
607   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
608   if (!CLI.lowerCall(MIRBuilder, Info))
609     return LegalizerHelper::UnableToLegalize;
610 
611   if (Info.LoweredTailCall) {
612     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
613     // We must have a return following the call (or debug insts) to get past
614     // isLibCallInTailPosition.
615     do {
616       MachineInstr *Next = MI.getNextNode();
617       assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
618              "Expected instr following MI to be return or debug inst?");
619       // We lowered a tail call, so the call is now the return from the block.
620       // Delete the old return.
621       Next->eraseFromParent();
622     } while (MI.getNextNode());
623   }
624 
625   return LegalizerHelper::Legalized;
626 }
627 
628 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
629                                        Type *FromType) {
630   auto ToMVT = MVT::getVT(ToType);
631   auto FromMVT = MVT::getVT(FromType);
632 
633   switch (Opcode) {
634   case TargetOpcode::G_FPEXT:
635     return RTLIB::getFPEXT(FromMVT, ToMVT);
636   case TargetOpcode::G_FPTRUNC:
637     return RTLIB::getFPROUND(FromMVT, ToMVT);
638   case TargetOpcode::G_FPTOSI:
639     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
640   case TargetOpcode::G_FPTOUI:
641     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
642   case TargetOpcode::G_SITOFP:
643     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
644   case TargetOpcode::G_UITOFP:
645     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
646   }
647   llvm_unreachable("Unsupported libcall function");
648 }
649 
650 static LegalizerHelper::LegalizeResult
651 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
652                   Type *FromType) {
653   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
654   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
655                        {{MI.getOperand(1).getReg(), FromType}});
656 }
657 
658 LegalizerHelper::LegalizeResult
659 LegalizerHelper::libcall(MachineInstr &MI) {
660   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
661   unsigned Size = LLTy.getSizeInBits();
662   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
663 
664   switch (MI.getOpcode()) {
665   default:
666     return UnableToLegalize;
667   case TargetOpcode::G_SDIV:
668   case TargetOpcode::G_UDIV:
669   case TargetOpcode::G_SREM:
670   case TargetOpcode::G_UREM:
671   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
672     Type *HLTy = IntegerType::get(Ctx, Size);
673     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
674     if (Status != Legalized)
675       return Status;
676     break;
677   }
678   case TargetOpcode::G_FADD:
679   case TargetOpcode::G_FSUB:
680   case TargetOpcode::G_FMUL:
681   case TargetOpcode::G_FDIV:
682   case TargetOpcode::G_FMA:
683   case TargetOpcode::G_FPOW:
684   case TargetOpcode::G_FREM:
685   case TargetOpcode::G_FCOS:
686   case TargetOpcode::G_FSIN:
687   case TargetOpcode::G_FLOG10:
688   case TargetOpcode::G_FLOG:
689   case TargetOpcode::G_FLOG2:
690   case TargetOpcode::G_FEXP:
691   case TargetOpcode::G_FEXP2:
692   case TargetOpcode::G_FCEIL:
693   case TargetOpcode::G_FFLOOR:
694   case TargetOpcode::G_FMINNUM:
695   case TargetOpcode::G_FMAXNUM:
696   case TargetOpcode::G_FSQRT:
697   case TargetOpcode::G_FRINT:
698   case TargetOpcode::G_FNEARBYINT:
699   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
700     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
701     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
702       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
703       return UnableToLegalize;
704     }
705     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
706     if (Status != Legalized)
707       return Status;
708     break;
709   }
710   case TargetOpcode::G_FPEXT:
711   case TargetOpcode::G_FPTRUNC: {
712     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
713     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
714     if (!FromTy || !ToTy)
715       return UnableToLegalize;
716     LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
717     if (Status != Legalized)
718       return Status;
719     break;
720   }
721   case TargetOpcode::G_FPTOSI:
722   case TargetOpcode::G_FPTOUI: {
723     // FIXME: Support other types
724     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
725     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
726     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
727       return UnableToLegalize;
728     LegalizeResult Status = conversionLibcall(
729         MI, MIRBuilder,
730         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
731         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
732     if (Status != Legalized)
733       return Status;
734     break;
735   }
736   case TargetOpcode::G_SITOFP:
737   case TargetOpcode::G_UITOFP: {
738     // FIXME: Support other types
739     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
740     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
741     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
742       return UnableToLegalize;
743     LegalizeResult Status = conversionLibcall(
744         MI, MIRBuilder,
745         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
746         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
747     if (Status != Legalized)
748       return Status;
749     break;
750   }
751   case TargetOpcode::G_MEMCPY:
752   case TargetOpcode::G_MEMMOVE:
753   case TargetOpcode::G_MEMSET: {
754     LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
755     MI.eraseFromParent();
756     return Result;
757   }
758   }
759 
760   MI.eraseFromParent();
761   return Legalized;
762 }
763 
764 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
765                                                               unsigned TypeIdx,
766                                                               LLT NarrowTy) {
767   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
768   uint64_t NarrowSize = NarrowTy.getSizeInBits();
769 
770   switch (MI.getOpcode()) {
771   default:
772     return UnableToLegalize;
773   case TargetOpcode::G_IMPLICIT_DEF: {
774     Register DstReg = MI.getOperand(0).getReg();
775     LLT DstTy = MRI.getType(DstReg);
776 
777     // If SizeOp0 is not an exact multiple of NarrowSize, emit
778     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
779     // FIXME: Although this would also be legal for the general case, it causes
780     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
781     //  combines not being hit). This seems to be a problem related to the
782     //  artifact combiner.
783     if (SizeOp0 % NarrowSize != 0) {
784       LLT ImplicitTy = NarrowTy;
785       if (DstTy.isVector())
786         ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
787 
788       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
789       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
790 
791       MI.eraseFromParent();
792       return Legalized;
793     }
794 
795     int NumParts = SizeOp0 / NarrowSize;
796 
797     SmallVector<Register, 2> DstRegs;
798     for (int i = 0; i < NumParts; ++i)
799       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
800 
801     if (DstTy.isVector())
802       MIRBuilder.buildBuildVector(DstReg, DstRegs);
803     else
804       MIRBuilder.buildMerge(DstReg, DstRegs);
805     MI.eraseFromParent();
806     return Legalized;
807   }
808   case TargetOpcode::G_CONSTANT: {
809     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
810     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
811     unsigned TotalSize = Ty.getSizeInBits();
812     unsigned NarrowSize = NarrowTy.getSizeInBits();
813     int NumParts = TotalSize / NarrowSize;
814 
815     SmallVector<Register, 4> PartRegs;
816     for (int I = 0; I != NumParts; ++I) {
817       unsigned Offset = I * NarrowSize;
818       auto K = MIRBuilder.buildConstant(NarrowTy,
819                                         Val.lshr(Offset).trunc(NarrowSize));
820       PartRegs.push_back(K.getReg(0));
821     }
822 
823     LLT LeftoverTy;
824     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
825     SmallVector<Register, 1> LeftoverRegs;
826     if (LeftoverBits != 0) {
827       LeftoverTy = LLT::scalar(LeftoverBits);
828       auto K = MIRBuilder.buildConstant(
829         LeftoverTy,
830         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
831       LeftoverRegs.push_back(K.getReg(0));
832     }
833 
834     insertParts(MI.getOperand(0).getReg(),
835                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
836 
837     MI.eraseFromParent();
838     return Legalized;
839   }
840   case TargetOpcode::G_SEXT:
841   case TargetOpcode::G_ZEXT:
842   case TargetOpcode::G_ANYEXT:
843     return narrowScalarExt(MI, TypeIdx, NarrowTy);
844   case TargetOpcode::G_TRUNC: {
845     if (TypeIdx != 1)
846       return UnableToLegalize;
847 
848     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
849     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
850       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
851       return UnableToLegalize;
852     }
853 
854     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
855     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
856     MI.eraseFromParent();
857     return Legalized;
858   }
859 
860   case TargetOpcode::G_FREEZE:
861     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
862   case TargetOpcode::G_ADD:
863   case TargetOpcode::G_SUB:
864     return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
865   case TargetOpcode::G_MUL:
866   case TargetOpcode::G_UMULH:
867     return narrowScalarMul(MI, NarrowTy);
868   case TargetOpcode::G_EXTRACT:
869     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
870   case TargetOpcode::G_INSERT:
871     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
872   case TargetOpcode::G_LOAD: {
873     auto &MMO = **MI.memoperands_begin();
874     Register DstReg = MI.getOperand(0).getReg();
875     LLT DstTy = MRI.getType(DstReg);
876     if (DstTy.isVector())
877       return UnableToLegalize;
878 
879     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
880       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
881       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
882       MIRBuilder.buildAnyExt(DstReg, TmpReg);
883       MI.eraseFromParent();
884       return Legalized;
885     }
886 
887     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
888   }
889   case TargetOpcode::G_ZEXTLOAD:
890   case TargetOpcode::G_SEXTLOAD: {
891     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
892     Register DstReg = MI.getOperand(0).getReg();
893     Register PtrReg = MI.getOperand(1).getReg();
894 
895     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
896     auto &MMO = **MI.memoperands_begin();
897     unsigned MemSize = MMO.getSizeInBits();
898 
899     if (MemSize == NarrowSize) {
900       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
901     } else if (MemSize < NarrowSize) {
902       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
903     } else if (MemSize > NarrowSize) {
904       // FIXME: Need to split the load.
905       return UnableToLegalize;
906     }
907 
908     if (ZExt)
909       MIRBuilder.buildZExt(DstReg, TmpReg);
910     else
911       MIRBuilder.buildSExt(DstReg, TmpReg);
912 
913     MI.eraseFromParent();
914     return Legalized;
915   }
916   case TargetOpcode::G_STORE: {
917     const auto &MMO = **MI.memoperands_begin();
918 
919     Register SrcReg = MI.getOperand(0).getReg();
920     LLT SrcTy = MRI.getType(SrcReg);
921     if (SrcTy.isVector())
922       return UnableToLegalize;
923 
924     int NumParts = SizeOp0 / NarrowSize;
925     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
926     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
927     if (SrcTy.isVector() && LeftoverBits != 0)
928       return UnableToLegalize;
929 
930     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
931       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
932       auto &MMO = **MI.memoperands_begin();
933       MIRBuilder.buildTrunc(TmpReg, SrcReg);
934       MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
935       MI.eraseFromParent();
936       return Legalized;
937     }
938 
939     return reduceLoadStoreWidth(MI, 0, NarrowTy);
940   }
941   case TargetOpcode::G_SELECT:
942     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
943   case TargetOpcode::G_AND:
944   case TargetOpcode::G_OR:
945   case TargetOpcode::G_XOR: {
946     // Legalize bitwise operation:
947     // A = BinOp<Ty> B, C
948     // into:
949     // B1, ..., BN = G_UNMERGE_VALUES B
950     // C1, ..., CN = G_UNMERGE_VALUES C
951     // A1 = BinOp<Ty/N> B1, C2
952     // ...
953     // AN = BinOp<Ty/N> BN, CN
954     // A = G_MERGE_VALUES A1, ..., AN
955     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
956   }
957   case TargetOpcode::G_SHL:
958   case TargetOpcode::G_LSHR:
959   case TargetOpcode::G_ASHR:
960     return narrowScalarShift(MI, TypeIdx, NarrowTy);
961   case TargetOpcode::G_CTLZ:
962   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
963   case TargetOpcode::G_CTTZ:
964   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
965   case TargetOpcode::G_CTPOP:
966     if (TypeIdx == 1)
967       switch (MI.getOpcode()) {
968       case TargetOpcode::G_CTLZ:
969       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
970         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
971       case TargetOpcode::G_CTTZ:
972       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
973         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
974       case TargetOpcode::G_CTPOP:
975         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
976       default:
977         return UnableToLegalize;
978       }
979 
980     Observer.changingInstr(MI);
981     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
982     Observer.changedInstr(MI);
983     return Legalized;
984   case TargetOpcode::G_INTTOPTR:
985     if (TypeIdx != 1)
986       return UnableToLegalize;
987 
988     Observer.changingInstr(MI);
989     narrowScalarSrc(MI, NarrowTy, 1);
990     Observer.changedInstr(MI);
991     return Legalized;
992   case TargetOpcode::G_PTRTOINT:
993     if (TypeIdx != 0)
994       return UnableToLegalize;
995 
996     Observer.changingInstr(MI);
997     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
998     Observer.changedInstr(MI);
999     return Legalized;
1000   case TargetOpcode::G_PHI: {
1001     unsigned NumParts = SizeOp0 / NarrowSize;
1002     SmallVector<Register, 2> DstRegs(NumParts);
1003     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1004     Observer.changingInstr(MI);
1005     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1006       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1007       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1008       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1009                    SrcRegs[i / 2]);
1010     }
1011     MachineBasicBlock &MBB = *MI.getParent();
1012     MIRBuilder.setInsertPt(MBB, MI);
1013     for (unsigned i = 0; i < NumParts; ++i) {
1014       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1015       MachineInstrBuilder MIB =
1016           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1017       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1018         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1019     }
1020     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1021     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1022     Observer.changedInstr(MI);
1023     MI.eraseFromParent();
1024     return Legalized;
1025   }
1026   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1027   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1028     if (TypeIdx != 2)
1029       return UnableToLegalize;
1030 
1031     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1032     Observer.changingInstr(MI);
1033     narrowScalarSrc(MI, NarrowTy, OpIdx);
1034     Observer.changedInstr(MI);
1035     return Legalized;
1036   }
1037   case TargetOpcode::G_ICMP: {
1038     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1039     if (NarrowSize * 2 != SrcSize)
1040       return UnableToLegalize;
1041 
1042     Observer.changingInstr(MI);
1043     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1044     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1045     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1046 
1047     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1048     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1049     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1050 
1051     CmpInst::Predicate Pred =
1052         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1053     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1054 
1055     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1056       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1057       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1058       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1059       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1060       MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1061     } else {
1062       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1063       MachineInstrBuilder CmpHEQ =
1064           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1065       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1066           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1067       MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1068     }
1069     Observer.changedInstr(MI);
1070     MI.eraseFromParent();
1071     return Legalized;
1072   }
1073   case TargetOpcode::G_SEXT_INREG: {
1074     if (TypeIdx != 0)
1075       return UnableToLegalize;
1076 
1077     int64_t SizeInBits = MI.getOperand(2).getImm();
1078 
1079     // So long as the new type has more bits than the bits we're extending we
1080     // don't need to break it apart.
1081     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1082       Observer.changingInstr(MI);
1083       // We don't lose any non-extension bits by truncating the src and
1084       // sign-extending the dst.
1085       MachineOperand &MO1 = MI.getOperand(1);
1086       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1087       MO1.setReg(TruncMIB.getReg(0));
1088 
1089       MachineOperand &MO2 = MI.getOperand(0);
1090       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1091       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1092       MIRBuilder.buildSExt(MO2, DstExt);
1093       MO2.setReg(DstExt);
1094       Observer.changedInstr(MI);
1095       return Legalized;
1096     }
1097 
1098     // Break it apart. Components below the extension point are unmodified. The
1099     // component containing the extension point becomes a narrower SEXT_INREG.
1100     // Components above it are ashr'd from the component containing the
1101     // extension point.
1102     if (SizeOp0 % NarrowSize != 0)
1103       return UnableToLegalize;
1104     int NumParts = SizeOp0 / NarrowSize;
1105 
1106     // List the registers where the destination will be scattered.
1107     SmallVector<Register, 2> DstRegs;
1108     // List the registers where the source will be split.
1109     SmallVector<Register, 2> SrcRegs;
1110 
1111     // Create all the temporary registers.
1112     for (int i = 0; i < NumParts; ++i) {
1113       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1114 
1115       SrcRegs.push_back(SrcReg);
1116     }
1117 
1118     // Explode the big arguments into smaller chunks.
1119     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1120 
1121     Register AshrCstReg =
1122         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1123             .getReg(0);
1124     Register FullExtensionReg = 0;
1125     Register PartialExtensionReg = 0;
1126 
1127     // Do the operation on each small part.
1128     for (int i = 0; i < NumParts; ++i) {
1129       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1130         DstRegs.push_back(SrcRegs[i]);
1131       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1132         assert(PartialExtensionReg &&
1133                "Expected to visit partial extension before full");
1134         if (FullExtensionReg) {
1135           DstRegs.push_back(FullExtensionReg);
1136           continue;
1137         }
1138         DstRegs.push_back(
1139             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1140                 .getReg(0));
1141         FullExtensionReg = DstRegs.back();
1142       } else {
1143         DstRegs.push_back(
1144             MIRBuilder
1145                 .buildInstr(
1146                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
1147                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1148                 .getReg(0));
1149         PartialExtensionReg = DstRegs.back();
1150       }
1151     }
1152 
1153     // Gather the destination registers into the final destination.
1154     Register DstReg = MI.getOperand(0).getReg();
1155     MIRBuilder.buildMerge(DstReg, DstRegs);
1156     MI.eraseFromParent();
1157     return Legalized;
1158   }
1159   case TargetOpcode::G_BSWAP:
1160   case TargetOpcode::G_BITREVERSE: {
1161     if (SizeOp0 % NarrowSize != 0)
1162       return UnableToLegalize;
1163 
1164     Observer.changingInstr(MI);
1165     SmallVector<Register, 2> SrcRegs, DstRegs;
1166     unsigned NumParts = SizeOp0 / NarrowSize;
1167     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1168 
1169     for (unsigned i = 0; i < NumParts; ++i) {
1170       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1171                                            {SrcRegs[NumParts - 1 - i]});
1172       DstRegs.push_back(DstPart.getReg(0));
1173     }
1174 
1175     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1176 
1177     Observer.changedInstr(MI);
1178     MI.eraseFromParent();
1179     return Legalized;
1180   }
1181   case TargetOpcode::G_PTR_ADD:
1182   case TargetOpcode::G_PTRMASK: {
1183     if (TypeIdx != 1)
1184       return UnableToLegalize;
1185     Observer.changingInstr(MI);
1186     narrowScalarSrc(MI, NarrowTy, 2);
1187     Observer.changedInstr(MI);
1188     return Legalized;
1189   }
1190   case TargetOpcode::G_FPTOUI: {
1191     if (TypeIdx != 0)
1192       return UnableToLegalize;
1193     Observer.changingInstr(MI);
1194     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1195     Observer.changedInstr(MI);
1196     return Legalized;
1197   }
1198   case TargetOpcode::G_FPTOSI: {
1199     if (TypeIdx != 0)
1200       return UnableToLegalize;
1201     Observer.changingInstr(MI);
1202     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
1203     Observer.changedInstr(MI);
1204     return Legalized;
1205   }
1206   case TargetOpcode::G_FPEXT:
1207     if (TypeIdx != 0)
1208       return UnableToLegalize;
1209     Observer.changingInstr(MI);
1210     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1211     Observer.changedInstr(MI);
1212     return Legalized;
1213   }
1214 }
1215 
1216 Register LegalizerHelper::coerceToScalar(Register Val) {
1217   LLT Ty = MRI.getType(Val);
1218   if (Ty.isScalar())
1219     return Val;
1220 
1221   const DataLayout &DL = MIRBuilder.getDataLayout();
1222   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1223   if (Ty.isPointer()) {
1224     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1225       return Register();
1226     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1227   }
1228 
1229   Register NewVal = Val;
1230 
1231   assert(Ty.isVector());
1232   LLT EltTy = Ty.getElementType();
1233   if (EltTy.isPointer())
1234     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1235   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1236 }
1237 
1238 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1239                                      unsigned OpIdx, unsigned ExtOpcode) {
1240   MachineOperand &MO = MI.getOperand(OpIdx);
1241   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1242   MO.setReg(ExtB.getReg(0));
1243 }
1244 
1245 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1246                                       unsigned OpIdx) {
1247   MachineOperand &MO = MI.getOperand(OpIdx);
1248   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1249   MO.setReg(ExtB.getReg(0));
1250 }
1251 
1252 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1253                                      unsigned OpIdx, unsigned TruncOpcode) {
1254   MachineOperand &MO = MI.getOperand(OpIdx);
1255   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1256   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1257   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1258   MO.setReg(DstExt);
1259 }
1260 
1261 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1262                                       unsigned OpIdx, unsigned ExtOpcode) {
1263   MachineOperand &MO = MI.getOperand(OpIdx);
1264   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1265   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1266   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1267   MO.setReg(DstTrunc);
1268 }
1269 
1270 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1271                                             unsigned OpIdx) {
1272   MachineOperand &MO = MI.getOperand(OpIdx);
1273   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1274   MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1275 }
1276 
1277 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1278                                             unsigned OpIdx) {
1279   MachineOperand &MO = MI.getOperand(OpIdx);
1280 
1281   LLT OldTy = MRI.getType(MO.getReg());
1282   unsigned OldElts = OldTy.getNumElements();
1283   unsigned NewElts = MoreTy.getNumElements();
1284 
1285   unsigned NumParts = NewElts / OldElts;
1286 
1287   // Use concat_vectors if the result is a multiple of the number of elements.
1288   if (NumParts * OldElts == NewElts) {
1289     SmallVector<Register, 8> Parts;
1290     Parts.push_back(MO.getReg());
1291 
1292     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1293     for (unsigned I = 1; I != NumParts; ++I)
1294       Parts.push_back(ImpDef);
1295 
1296     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1297     MO.setReg(Concat.getReg(0));
1298     return;
1299   }
1300 
1301   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1302   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1303   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1304   MO.setReg(MoreReg);
1305 }
1306 
1307 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1308   MachineOperand &Op = MI.getOperand(OpIdx);
1309   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1310 }
1311 
1312 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1313   MachineOperand &MO = MI.getOperand(OpIdx);
1314   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1315   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1316   MIRBuilder.buildBitcast(MO, CastDst);
1317   MO.setReg(CastDst);
1318 }
1319 
1320 LegalizerHelper::LegalizeResult
1321 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1322                                         LLT WideTy) {
1323   if (TypeIdx != 1)
1324     return UnableToLegalize;
1325 
1326   Register DstReg = MI.getOperand(0).getReg();
1327   LLT DstTy = MRI.getType(DstReg);
1328   if (DstTy.isVector())
1329     return UnableToLegalize;
1330 
1331   Register Src1 = MI.getOperand(1).getReg();
1332   LLT SrcTy = MRI.getType(Src1);
1333   const int DstSize = DstTy.getSizeInBits();
1334   const int SrcSize = SrcTy.getSizeInBits();
1335   const int WideSize = WideTy.getSizeInBits();
1336   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1337 
1338   unsigned NumOps = MI.getNumOperands();
1339   unsigned NumSrc = MI.getNumOperands() - 1;
1340   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1341 
1342   if (WideSize >= DstSize) {
1343     // Directly pack the bits in the target type.
1344     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1345 
1346     for (unsigned I = 2; I != NumOps; ++I) {
1347       const unsigned Offset = (I - 1) * PartSize;
1348 
1349       Register SrcReg = MI.getOperand(I).getReg();
1350       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1351 
1352       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1353 
1354       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1355         MRI.createGenericVirtualRegister(WideTy);
1356 
1357       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1358       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1359       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1360       ResultReg = NextResult;
1361     }
1362 
1363     if (WideSize > DstSize)
1364       MIRBuilder.buildTrunc(DstReg, ResultReg);
1365     else if (DstTy.isPointer())
1366       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1367 
1368     MI.eraseFromParent();
1369     return Legalized;
1370   }
1371 
1372   // Unmerge the original values to the GCD type, and recombine to the next
1373   // multiple greater than the original type.
1374   //
1375   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1376   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1377   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1378   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1379   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1380   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1381   // %12:_(s12) = G_MERGE_VALUES %10, %11
1382   //
1383   // Padding with undef if necessary:
1384   //
1385   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1386   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1387   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1388   // %7:_(s2) = G_IMPLICIT_DEF
1389   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1390   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1391   // %10:_(s12) = G_MERGE_VALUES %8, %9
1392 
1393   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1394   LLT GCDTy = LLT::scalar(GCD);
1395 
1396   SmallVector<Register, 8> Parts;
1397   SmallVector<Register, 8> NewMergeRegs;
1398   SmallVector<Register, 8> Unmerges;
1399   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1400 
1401   // Decompose the original operands if they don't evenly divide.
1402   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1403     Register SrcReg = MI.getOperand(I).getReg();
1404     if (GCD == SrcSize) {
1405       Unmerges.push_back(SrcReg);
1406     } else {
1407       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1408       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1409         Unmerges.push_back(Unmerge.getReg(J));
1410     }
1411   }
1412 
1413   // Pad with undef to the next size that is a multiple of the requested size.
1414   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1415     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1416     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1417       Unmerges.push_back(UndefReg);
1418   }
1419 
1420   const int PartsPerGCD = WideSize / GCD;
1421 
1422   // Build merges of each piece.
1423   ArrayRef<Register> Slicer(Unmerges);
1424   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1425     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1426     NewMergeRegs.push_back(Merge.getReg(0));
1427   }
1428 
1429   // A truncate may be necessary if the requested type doesn't evenly divide the
1430   // original result type.
1431   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1432     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1433   } else {
1434     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1435     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1436   }
1437 
1438   MI.eraseFromParent();
1439   return Legalized;
1440 }
1441 
1442 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
1443   Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1444   LLT OrigTy = MRI.getType(OrigReg);
1445   LLT LCMTy = getLCMType(WideTy, OrigTy);
1446 
1447   const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1448   const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1449 
1450   Register UnmergeSrc = WideReg;
1451 
1452   // Create a merge to the LCM type, padding with undef
1453   // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1454   // =>
1455   // %1:_(<4 x s32>) = G_FOO
1456   // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1457   // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1458   // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1459   if (NumMergeParts > 1) {
1460     Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1461     SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1462     MergeParts[0] = WideReg;
1463     UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1464   }
1465 
1466   // Unmerge to the original register and pad with dead defs.
1467   SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1468   UnmergeResults[0] = OrigReg;
1469   for (int I = 1; I != NumUnmergeParts; ++I)
1470     UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1471 
1472   MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1473   return WideReg;
1474 }
1475 
1476 LegalizerHelper::LegalizeResult
1477 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1478                                           LLT WideTy) {
1479   if (TypeIdx != 0)
1480     return UnableToLegalize;
1481 
1482   int NumDst = MI.getNumOperands() - 1;
1483   Register SrcReg = MI.getOperand(NumDst).getReg();
1484   LLT SrcTy = MRI.getType(SrcReg);
1485   if (SrcTy.isVector())
1486     return UnableToLegalize;
1487 
1488   Register Dst0Reg = MI.getOperand(0).getReg();
1489   LLT DstTy = MRI.getType(Dst0Reg);
1490   if (!DstTy.isScalar())
1491     return UnableToLegalize;
1492 
1493   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1494     if (SrcTy.isPointer()) {
1495       const DataLayout &DL = MIRBuilder.getDataLayout();
1496       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1497         LLVM_DEBUG(
1498             dbgs() << "Not casting non-integral address space integer\n");
1499         return UnableToLegalize;
1500       }
1501 
1502       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1503       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1504     }
1505 
1506     // Widen SrcTy to WideTy. This does not affect the result, but since the
1507     // user requested this size, it is probably better handled than SrcTy and
1508     // should reduce the total number of legalization artifacts
1509     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1510       SrcTy = WideTy;
1511       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1512     }
1513 
1514     // Theres no unmerge type to target. Directly extract the bits from the
1515     // source type
1516     unsigned DstSize = DstTy.getSizeInBits();
1517 
1518     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1519     for (int I = 1; I != NumDst; ++I) {
1520       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1521       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1522       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1523     }
1524 
1525     MI.eraseFromParent();
1526     return Legalized;
1527   }
1528 
1529   // Extend the source to a wider type.
1530   LLT LCMTy = getLCMType(SrcTy, WideTy);
1531 
1532   Register WideSrc = SrcReg;
1533   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1534     // TODO: If this is an integral address space, cast to integer and anyext.
1535     if (SrcTy.isPointer()) {
1536       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1537       return UnableToLegalize;
1538     }
1539 
1540     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1541   }
1542 
1543   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1544 
1545   // Create a sequence of unmerges and merges to the original results. Since we
1546   // may have widened the source, we will need to pad the results with dead defs
1547   // to cover the source register.
1548   // e.g. widen s48 to s64:
1549   // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1550   //
1551   // =>
1552   //  %4:_(s192) = G_ANYEXT %0:_(s96)
1553   //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1554   //  ; unpack to GCD type, with extra dead defs
1555   //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1556   //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1557   //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1558   //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
1559   //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1560   const LLT GCDTy = getGCDType(WideTy, DstTy);
1561   const int NumUnmerge = Unmerge->getNumOperands() - 1;
1562   const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1563 
1564   // Directly unmerge to the destination without going through a GCD type
1565   // if possible
1566   if (PartsPerRemerge == 1) {
1567     const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1568 
1569     for (int I = 0; I != NumUnmerge; ++I) {
1570       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1571 
1572       for (int J = 0; J != PartsPerUnmerge; ++J) {
1573         int Idx = I * PartsPerUnmerge + J;
1574         if (Idx < NumDst)
1575           MIB.addDef(MI.getOperand(Idx).getReg());
1576         else {
1577           // Create dead def for excess components.
1578           MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1579         }
1580       }
1581 
1582       MIB.addUse(Unmerge.getReg(I));
1583     }
1584   } else {
1585     SmallVector<Register, 16> Parts;
1586     for (int J = 0; J != NumUnmerge; ++J)
1587       extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1588 
1589     SmallVector<Register, 8> RemergeParts;
1590     for (int I = 0; I != NumDst; ++I) {
1591       for (int J = 0; J < PartsPerRemerge; ++J) {
1592         const int Idx = I * PartsPerRemerge + J;
1593         RemergeParts.emplace_back(Parts[Idx]);
1594       }
1595 
1596       MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1597       RemergeParts.clear();
1598     }
1599   }
1600 
1601   MI.eraseFromParent();
1602   return Legalized;
1603 }
1604 
1605 LegalizerHelper::LegalizeResult
1606 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1607                                     LLT WideTy) {
1608   Register DstReg = MI.getOperand(0).getReg();
1609   Register SrcReg = MI.getOperand(1).getReg();
1610   LLT SrcTy = MRI.getType(SrcReg);
1611 
1612   LLT DstTy = MRI.getType(DstReg);
1613   unsigned Offset = MI.getOperand(2).getImm();
1614 
1615   if (TypeIdx == 0) {
1616     if (SrcTy.isVector() || DstTy.isVector())
1617       return UnableToLegalize;
1618 
1619     SrcOp Src(SrcReg);
1620     if (SrcTy.isPointer()) {
1621       // Extracts from pointers can be handled only if they are really just
1622       // simple integers.
1623       const DataLayout &DL = MIRBuilder.getDataLayout();
1624       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1625         return UnableToLegalize;
1626 
1627       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1628       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1629       SrcTy = SrcAsIntTy;
1630     }
1631 
1632     if (DstTy.isPointer())
1633       return UnableToLegalize;
1634 
1635     if (Offset == 0) {
1636       // Avoid a shift in the degenerate case.
1637       MIRBuilder.buildTrunc(DstReg,
1638                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1639       MI.eraseFromParent();
1640       return Legalized;
1641     }
1642 
1643     // Do a shift in the source type.
1644     LLT ShiftTy = SrcTy;
1645     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1646       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1647       ShiftTy = WideTy;
1648     }
1649 
1650     auto LShr = MIRBuilder.buildLShr(
1651       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1652     MIRBuilder.buildTrunc(DstReg, LShr);
1653     MI.eraseFromParent();
1654     return Legalized;
1655   }
1656 
1657   if (SrcTy.isScalar()) {
1658     Observer.changingInstr(MI);
1659     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1660     Observer.changedInstr(MI);
1661     return Legalized;
1662   }
1663 
1664   if (!SrcTy.isVector())
1665     return UnableToLegalize;
1666 
1667   if (DstTy != SrcTy.getElementType())
1668     return UnableToLegalize;
1669 
1670   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1671     return UnableToLegalize;
1672 
1673   Observer.changingInstr(MI);
1674   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1675 
1676   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1677                           Offset);
1678   widenScalarDst(MI, WideTy.getScalarType(), 0);
1679   Observer.changedInstr(MI);
1680   return Legalized;
1681 }
1682 
1683 LegalizerHelper::LegalizeResult
1684 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1685                                    LLT WideTy) {
1686   if (TypeIdx != 0 || WideTy.isVector())
1687     return UnableToLegalize;
1688   Observer.changingInstr(MI);
1689   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1690   widenScalarDst(MI, WideTy);
1691   Observer.changedInstr(MI);
1692   return Legalized;
1693 }
1694 
1695 LegalizerHelper::LegalizeResult
1696 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1697                                            LLT WideTy) {
1698   if (TypeIdx == 1)
1699     return UnableToLegalize; // TODO
1700 
1701   unsigned Opcode;
1702   unsigned ExtOpcode;
1703   Optional<Register> CarryIn = None;
1704   switch (MI.getOpcode()) {
1705   default:
1706     llvm_unreachable("Unexpected opcode!");
1707   case TargetOpcode::G_SADDO:
1708     Opcode = TargetOpcode::G_ADD;
1709     ExtOpcode = TargetOpcode::G_SEXT;
1710     break;
1711   case TargetOpcode::G_SSUBO:
1712     Opcode = TargetOpcode::G_SUB;
1713     ExtOpcode = TargetOpcode::G_SEXT;
1714     break;
1715   case TargetOpcode::G_UADDO:
1716     Opcode = TargetOpcode::G_ADD;
1717     ExtOpcode = TargetOpcode::G_ZEXT;
1718     break;
1719   case TargetOpcode::G_USUBO:
1720     Opcode = TargetOpcode::G_SUB;
1721     ExtOpcode = TargetOpcode::G_ZEXT;
1722     break;
1723   case TargetOpcode::G_SADDE:
1724     Opcode = TargetOpcode::G_UADDE;
1725     ExtOpcode = TargetOpcode::G_SEXT;
1726     CarryIn = MI.getOperand(4).getReg();
1727     break;
1728   case TargetOpcode::G_SSUBE:
1729     Opcode = TargetOpcode::G_USUBE;
1730     ExtOpcode = TargetOpcode::G_SEXT;
1731     CarryIn = MI.getOperand(4).getReg();
1732     break;
1733   case TargetOpcode::G_UADDE:
1734     Opcode = TargetOpcode::G_UADDE;
1735     ExtOpcode = TargetOpcode::G_ZEXT;
1736     CarryIn = MI.getOperand(4).getReg();
1737     break;
1738   case TargetOpcode::G_USUBE:
1739     Opcode = TargetOpcode::G_USUBE;
1740     ExtOpcode = TargetOpcode::G_ZEXT;
1741     CarryIn = MI.getOperand(4).getReg();
1742     break;
1743   }
1744 
1745   auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1746   auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1747   // Do the arithmetic in the larger type.
1748   Register NewOp;
1749   if (CarryIn) {
1750     LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1751     NewOp = MIRBuilder
1752                 .buildInstr(Opcode, {WideTy, CarryOutTy},
1753                             {LHSExt, RHSExt, *CarryIn})
1754                 .getReg(0);
1755   } else {
1756     NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1757   }
1758   LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1759   auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1760   auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1761   // There is no overflow if the ExtOp is the same as NewOp.
1762   MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1763   // Now trunc the NewOp to the original result.
1764   MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1765   MI.eraseFromParent();
1766   return Legalized;
1767 }
1768 
1769 LegalizerHelper::LegalizeResult
1770 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1771                                          LLT WideTy) {
1772   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1773                   MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1774                   MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1775   bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1776                  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1777   // We can convert this to:
1778   //   1. Any extend iN to iM
1779   //   2. SHL by M-N
1780   //   3. [US][ADD|SUB|SHL]SAT
1781   //   4. L/ASHR by M-N
1782   //
1783   // It may be more efficient to lower this to a min and a max operation in
1784   // the higher precision arithmetic if the promoted operation isn't legal,
1785   // but this decision is up to the target's lowering request.
1786   Register DstReg = MI.getOperand(0).getReg();
1787 
1788   unsigned NewBits = WideTy.getScalarSizeInBits();
1789   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1790 
1791   // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1792   // must not left shift the RHS to preserve the shift amount.
1793   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1794   auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1795                      : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1796   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1797   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1798   auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1799 
1800   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1801                                         {ShiftL, ShiftR}, MI.getFlags());
1802 
1803   // Use a shift that will preserve the number of sign bits when the trunc is
1804   // folded away.
1805   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1806                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1807 
1808   MIRBuilder.buildTrunc(DstReg, Result);
1809   MI.eraseFromParent();
1810   return Legalized;
1811 }
1812 
1813 LegalizerHelper::LegalizeResult
1814 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1815   switch (MI.getOpcode()) {
1816   default:
1817     return UnableToLegalize;
1818   case TargetOpcode::G_EXTRACT:
1819     return widenScalarExtract(MI, TypeIdx, WideTy);
1820   case TargetOpcode::G_INSERT:
1821     return widenScalarInsert(MI, TypeIdx, WideTy);
1822   case TargetOpcode::G_MERGE_VALUES:
1823     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1824   case TargetOpcode::G_UNMERGE_VALUES:
1825     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1826   case TargetOpcode::G_SADDO:
1827   case TargetOpcode::G_SSUBO:
1828   case TargetOpcode::G_UADDO:
1829   case TargetOpcode::G_USUBO:
1830   case TargetOpcode::G_SADDE:
1831   case TargetOpcode::G_SSUBE:
1832   case TargetOpcode::G_UADDE:
1833   case TargetOpcode::G_USUBE:
1834     return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
1835   case TargetOpcode::G_SADDSAT:
1836   case TargetOpcode::G_SSUBSAT:
1837   case TargetOpcode::G_SSHLSAT:
1838   case TargetOpcode::G_UADDSAT:
1839   case TargetOpcode::G_USUBSAT:
1840   case TargetOpcode::G_USHLSAT:
1841     return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1842   case TargetOpcode::G_CTTZ:
1843   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1844   case TargetOpcode::G_CTLZ:
1845   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1846   case TargetOpcode::G_CTPOP: {
1847     if (TypeIdx == 0) {
1848       Observer.changingInstr(MI);
1849       widenScalarDst(MI, WideTy, 0);
1850       Observer.changedInstr(MI);
1851       return Legalized;
1852     }
1853 
1854     Register SrcReg = MI.getOperand(1).getReg();
1855 
1856     // First ZEXT the input.
1857     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1858     LLT CurTy = MRI.getType(SrcReg);
1859     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1860       // The count is the same in the larger type except if the original
1861       // value was zero.  This can be handled by setting the bit just off
1862       // the top of the original type.
1863       auto TopBit =
1864           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1865       MIBSrc = MIRBuilder.buildOr(
1866         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1867     }
1868 
1869     // Perform the operation at the larger size.
1870     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1871     // This is already the correct result for CTPOP and CTTZs
1872     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1873         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1874       // The correct result is NewOp - (Difference in widety and current ty).
1875       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1876       MIBNewOp = MIRBuilder.buildSub(
1877           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1878     }
1879 
1880     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1881     MI.eraseFromParent();
1882     return Legalized;
1883   }
1884   case TargetOpcode::G_BSWAP: {
1885     Observer.changingInstr(MI);
1886     Register DstReg = MI.getOperand(0).getReg();
1887 
1888     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1889     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1890     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1891     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1892 
1893     MI.getOperand(0).setReg(DstExt);
1894 
1895     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1896 
1897     LLT Ty = MRI.getType(DstReg);
1898     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1899     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1900     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1901 
1902     MIRBuilder.buildTrunc(DstReg, ShrReg);
1903     Observer.changedInstr(MI);
1904     return Legalized;
1905   }
1906   case TargetOpcode::G_BITREVERSE: {
1907     Observer.changingInstr(MI);
1908 
1909     Register DstReg = MI.getOperand(0).getReg();
1910     LLT Ty = MRI.getType(DstReg);
1911     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1912 
1913     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1914     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1915     MI.getOperand(0).setReg(DstExt);
1916     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1917 
1918     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1919     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1920     MIRBuilder.buildTrunc(DstReg, Shift);
1921     Observer.changedInstr(MI);
1922     return Legalized;
1923   }
1924   case TargetOpcode::G_FREEZE:
1925     Observer.changingInstr(MI);
1926     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1927     widenScalarDst(MI, WideTy);
1928     Observer.changedInstr(MI);
1929     return Legalized;
1930 
1931   case TargetOpcode::G_ADD:
1932   case TargetOpcode::G_AND:
1933   case TargetOpcode::G_MUL:
1934   case TargetOpcode::G_OR:
1935   case TargetOpcode::G_XOR:
1936   case TargetOpcode::G_SUB:
1937     // Perform operation at larger width (any extension is fines here, high bits
1938     // don't affect the result) and then truncate the result back to the
1939     // original type.
1940     Observer.changingInstr(MI);
1941     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1942     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1943     widenScalarDst(MI, WideTy);
1944     Observer.changedInstr(MI);
1945     return Legalized;
1946 
1947   case TargetOpcode::G_SHL:
1948     Observer.changingInstr(MI);
1949 
1950     if (TypeIdx == 0) {
1951       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1952       widenScalarDst(MI, WideTy);
1953     } else {
1954       assert(TypeIdx == 1);
1955       // The "number of bits to shift" operand must preserve its value as an
1956       // unsigned integer:
1957       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1958     }
1959 
1960     Observer.changedInstr(MI);
1961     return Legalized;
1962 
1963   case TargetOpcode::G_SDIV:
1964   case TargetOpcode::G_SREM:
1965   case TargetOpcode::G_SMIN:
1966   case TargetOpcode::G_SMAX:
1967     Observer.changingInstr(MI);
1968     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1969     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1970     widenScalarDst(MI, WideTy);
1971     Observer.changedInstr(MI);
1972     return Legalized;
1973 
1974   case TargetOpcode::G_ASHR:
1975   case TargetOpcode::G_LSHR:
1976     Observer.changingInstr(MI);
1977 
1978     if (TypeIdx == 0) {
1979       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1980         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1981 
1982       widenScalarSrc(MI, WideTy, 1, CvtOp);
1983       widenScalarDst(MI, WideTy);
1984     } else {
1985       assert(TypeIdx == 1);
1986       // The "number of bits to shift" operand must preserve its value as an
1987       // unsigned integer:
1988       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1989     }
1990 
1991     Observer.changedInstr(MI);
1992     return Legalized;
1993   case TargetOpcode::G_UDIV:
1994   case TargetOpcode::G_UREM:
1995   case TargetOpcode::G_UMIN:
1996   case TargetOpcode::G_UMAX:
1997     Observer.changingInstr(MI);
1998     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1999     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2000     widenScalarDst(MI, WideTy);
2001     Observer.changedInstr(MI);
2002     return Legalized;
2003 
2004   case TargetOpcode::G_SELECT:
2005     Observer.changingInstr(MI);
2006     if (TypeIdx == 0) {
2007       // Perform operation at larger width (any extension is fine here, high
2008       // bits don't affect the result) and then truncate the result back to the
2009       // original type.
2010       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2011       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2012       widenScalarDst(MI, WideTy);
2013     } else {
2014       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2015       // Explicit extension is required here since high bits affect the result.
2016       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2017     }
2018     Observer.changedInstr(MI);
2019     return Legalized;
2020 
2021   case TargetOpcode::G_FPTOSI:
2022   case TargetOpcode::G_FPTOUI:
2023     Observer.changingInstr(MI);
2024 
2025     if (TypeIdx == 0)
2026       widenScalarDst(MI, WideTy);
2027     else
2028       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2029 
2030     Observer.changedInstr(MI);
2031     return Legalized;
2032   case TargetOpcode::G_SITOFP:
2033     Observer.changingInstr(MI);
2034 
2035     if (TypeIdx == 0)
2036       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2037     else
2038       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2039 
2040     Observer.changedInstr(MI);
2041     return Legalized;
2042   case TargetOpcode::G_UITOFP:
2043     Observer.changingInstr(MI);
2044 
2045     if (TypeIdx == 0)
2046       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2047     else
2048       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2049 
2050     Observer.changedInstr(MI);
2051     return Legalized;
2052   case TargetOpcode::G_LOAD:
2053   case TargetOpcode::G_SEXTLOAD:
2054   case TargetOpcode::G_ZEXTLOAD:
2055     Observer.changingInstr(MI);
2056     widenScalarDst(MI, WideTy);
2057     Observer.changedInstr(MI);
2058     return Legalized;
2059 
2060   case TargetOpcode::G_STORE: {
2061     if (TypeIdx != 0)
2062       return UnableToLegalize;
2063 
2064     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2065     if (!Ty.isScalar())
2066       return UnableToLegalize;
2067 
2068     Observer.changingInstr(MI);
2069 
2070     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2071       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2072     widenScalarSrc(MI, WideTy, 0, ExtType);
2073 
2074     Observer.changedInstr(MI);
2075     return Legalized;
2076   }
2077   case TargetOpcode::G_CONSTANT: {
2078     MachineOperand &SrcMO = MI.getOperand(1);
2079     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2080     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2081         MRI.getType(MI.getOperand(0).getReg()));
2082     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2083             ExtOpc == TargetOpcode::G_ANYEXT) &&
2084            "Illegal Extend");
2085     const APInt &SrcVal = SrcMO.getCImm()->getValue();
2086     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2087                            ? SrcVal.sext(WideTy.getSizeInBits())
2088                            : SrcVal.zext(WideTy.getSizeInBits());
2089     Observer.changingInstr(MI);
2090     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2091 
2092     widenScalarDst(MI, WideTy);
2093     Observer.changedInstr(MI);
2094     return Legalized;
2095   }
2096   case TargetOpcode::G_FCONSTANT: {
2097     MachineOperand &SrcMO = MI.getOperand(1);
2098     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2099     APFloat Val = SrcMO.getFPImm()->getValueAPF();
2100     bool LosesInfo;
2101     switch (WideTy.getSizeInBits()) {
2102     case 32:
2103       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
2104                   &LosesInfo);
2105       break;
2106     case 64:
2107       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
2108                   &LosesInfo);
2109       break;
2110     default:
2111       return UnableToLegalize;
2112     }
2113 
2114     assert(!LosesInfo && "extend should always be lossless");
2115 
2116     Observer.changingInstr(MI);
2117     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2118 
2119     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2120     Observer.changedInstr(MI);
2121     return Legalized;
2122   }
2123   case TargetOpcode::G_IMPLICIT_DEF: {
2124     Observer.changingInstr(MI);
2125     widenScalarDst(MI, WideTy);
2126     Observer.changedInstr(MI);
2127     return Legalized;
2128   }
2129   case TargetOpcode::G_BRCOND:
2130     Observer.changingInstr(MI);
2131     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2132     Observer.changedInstr(MI);
2133     return Legalized;
2134 
2135   case TargetOpcode::G_FCMP:
2136     Observer.changingInstr(MI);
2137     if (TypeIdx == 0)
2138       widenScalarDst(MI, WideTy);
2139     else {
2140       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2141       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2142     }
2143     Observer.changedInstr(MI);
2144     return Legalized;
2145 
2146   case TargetOpcode::G_ICMP:
2147     Observer.changingInstr(MI);
2148     if (TypeIdx == 0)
2149       widenScalarDst(MI, WideTy);
2150     else {
2151       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2152                                MI.getOperand(1).getPredicate()))
2153                                ? TargetOpcode::G_SEXT
2154                                : TargetOpcode::G_ZEXT;
2155       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2156       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2157     }
2158     Observer.changedInstr(MI);
2159     return Legalized;
2160 
2161   case TargetOpcode::G_PTR_ADD:
2162     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2163     Observer.changingInstr(MI);
2164     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2165     Observer.changedInstr(MI);
2166     return Legalized;
2167 
2168   case TargetOpcode::G_PHI: {
2169     assert(TypeIdx == 0 && "Expecting only Idx 0");
2170 
2171     Observer.changingInstr(MI);
2172     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2173       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2174       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2175       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2176     }
2177 
2178     MachineBasicBlock &MBB = *MI.getParent();
2179     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2180     widenScalarDst(MI, WideTy);
2181     Observer.changedInstr(MI);
2182     return Legalized;
2183   }
2184   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2185     if (TypeIdx == 0) {
2186       Register VecReg = MI.getOperand(1).getReg();
2187       LLT VecTy = MRI.getType(VecReg);
2188       Observer.changingInstr(MI);
2189 
2190       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
2191                                      WideTy.getSizeInBits()),
2192                      1, TargetOpcode::G_SEXT);
2193 
2194       widenScalarDst(MI, WideTy, 0);
2195       Observer.changedInstr(MI);
2196       return Legalized;
2197     }
2198 
2199     if (TypeIdx != 2)
2200       return UnableToLegalize;
2201     Observer.changingInstr(MI);
2202     // TODO: Probably should be zext
2203     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2204     Observer.changedInstr(MI);
2205     return Legalized;
2206   }
2207   case TargetOpcode::G_INSERT_VECTOR_ELT: {
2208     if (TypeIdx == 1) {
2209       Observer.changingInstr(MI);
2210 
2211       Register VecReg = MI.getOperand(1).getReg();
2212       LLT VecTy = MRI.getType(VecReg);
2213       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2214 
2215       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2216       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2217       widenScalarDst(MI, WideVecTy, 0);
2218       Observer.changedInstr(MI);
2219       return Legalized;
2220     }
2221 
2222     if (TypeIdx == 2) {
2223       Observer.changingInstr(MI);
2224       // TODO: Probably should be zext
2225       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2226       Observer.changedInstr(MI);
2227       return Legalized;
2228     }
2229 
2230     return UnableToLegalize;
2231   }
2232   case TargetOpcode::G_FADD:
2233   case TargetOpcode::G_FMUL:
2234   case TargetOpcode::G_FSUB:
2235   case TargetOpcode::G_FMA:
2236   case TargetOpcode::G_FMAD:
2237   case TargetOpcode::G_FNEG:
2238   case TargetOpcode::G_FABS:
2239   case TargetOpcode::G_FCANONICALIZE:
2240   case TargetOpcode::G_FMINNUM:
2241   case TargetOpcode::G_FMAXNUM:
2242   case TargetOpcode::G_FMINNUM_IEEE:
2243   case TargetOpcode::G_FMAXNUM_IEEE:
2244   case TargetOpcode::G_FMINIMUM:
2245   case TargetOpcode::G_FMAXIMUM:
2246   case TargetOpcode::G_FDIV:
2247   case TargetOpcode::G_FREM:
2248   case TargetOpcode::G_FCEIL:
2249   case TargetOpcode::G_FFLOOR:
2250   case TargetOpcode::G_FCOS:
2251   case TargetOpcode::G_FSIN:
2252   case TargetOpcode::G_FLOG10:
2253   case TargetOpcode::G_FLOG:
2254   case TargetOpcode::G_FLOG2:
2255   case TargetOpcode::G_FRINT:
2256   case TargetOpcode::G_FNEARBYINT:
2257   case TargetOpcode::G_FSQRT:
2258   case TargetOpcode::G_FEXP:
2259   case TargetOpcode::G_FEXP2:
2260   case TargetOpcode::G_FPOW:
2261   case TargetOpcode::G_INTRINSIC_TRUNC:
2262   case TargetOpcode::G_INTRINSIC_ROUND:
2263   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2264     assert(TypeIdx == 0);
2265     Observer.changingInstr(MI);
2266 
2267     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2268       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2269 
2270     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2271     Observer.changedInstr(MI);
2272     return Legalized;
2273   case TargetOpcode::G_FPOWI: {
2274     if (TypeIdx != 0)
2275       return UnableToLegalize;
2276     Observer.changingInstr(MI);
2277     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2278     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2279     Observer.changedInstr(MI);
2280     return Legalized;
2281   }
2282   case TargetOpcode::G_INTTOPTR:
2283     if (TypeIdx != 1)
2284       return UnableToLegalize;
2285 
2286     Observer.changingInstr(MI);
2287     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2288     Observer.changedInstr(MI);
2289     return Legalized;
2290   case TargetOpcode::G_PTRTOINT:
2291     if (TypeIdx != 0)
2292       return UnableToLegalize;
2293 
2294     Observer.changingInstr(MI);
2295     widenScalarDst(MI, WideTy, 0);
2296     Observer.changedInstr(MI);
2297     return Legalized;
2298   case TargetOpcode::G_BUILD_VECTOR: {
2299     Observer.changingInstr(MI);
2300 
2301     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2302     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2303       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2304 
2305     // Avoid changing the result vector type if the source element type was
2306     // requested.
2307     if (TypeIdx == 1) {
2308       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2309     } else {
2310       widenScalarDst(MI, WideTy, 0);
2311     }
2312 
2313     Observer.changedInstr(MI);
2314     return Legalized;
2315   }
2316   case TargetOpcode::G_SEXT_INREG:
2317     if (TypeIdx != 0)
2318       return UnableToLegalize;
2319 
2320     Observer.changingInstr(MI);
2321     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2322     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2323     Observer.changedInstr(MI);
2324     return Legalized;
2325   case TargetOpcode::G_PTRMASK: {
2326     if (TypeIdx != 1)
2327       return UnableToLegalize;
2328     Observer.changingInstr(MI);
2329     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2330     Observer.changedInstr(MI);
2331     return Legalized;
2332   }
2333   }
2334 }
2335 
2336 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2337                              MachineIRBuilder &B, Register Src, LLT Ty) {
2338   auto Unmerge = B.buildUnmerge(Ty, Src);
2339   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2340     Pieces.push_back(Unmerge.getReg(I));
2341 }
2342 
2343 LegalizerHelper::LegalizeResult
2344 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2345   Register Dst = MI.getOperand(0).getReg();
2346   Register Src = MI.getOperand(1).getReg();
2347   LLT DstTy = MRI.getType(Dst);
2348   LLT SrcTy = MRI.getType(Src);
2349 
2350   if (SrcTy.isVector()) {
2351     LLT SrcEltTy = SrcTy.getElementType();
2352     SmallVector<Register, 8> SrcRegs;
2353 
2354     if (DstTy.isVector()) {
2355       int NumDstElt = DstTy.getNumElements();
2356       int NumSrcElt = SrcTy.getNumElements();
2357 
2358       LLT DstEltTy = DstTy.getElementType();
2359       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2360       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2361 
2362       // If there's an element size mismatch, insert intermediate casts to match
2363       // the result element type.
2364       if (NumSrcElt < NumDstElt) { // Source element type is larger.
2365         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2366         //
2367         // =>
2368         //
2369         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2370         // %3:_(<2 x s8>) = G_BITCAST %2
2371         // %4:_(<2 x s8>) = G_BITCAST %3
2372         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2373         DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2374         SrcPartTy = SrcEltTy;
2375       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2376         //
2377         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2378         //
2379         // =>
2380         //
2381         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2382         // %3:_(s16) = G_BITCAST %2
2383         // %4:_(s16) = G_BITCAST %3
2384         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2385         SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2386         DstCastTy = DstEltTy;
2387       }
2388 
2389       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2390       for (Register &SrcReg : SrcRegs)
2391         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2392     } else
2393       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2394 
2395     MIRBuilder.buildMerge(Dst, SrcRegs);
2396     MI.eraseFromParent();
2397     return Legalized;
2398   }
2399 
2400   if (DstTy.isVector()) {
2401     SmallVector<Register, 8> SrcRegs;
2402     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2403     MIRBuilder.buildMerge(Dst, SrcRegs);
2404     MI.eraseFromParent();
2405     return Legalized;
2406   }
2407 
2408   return UnableToLegalize;
2409 }
2410 
2411 /// Figure out the bit offset into a register when coercing a vector index for
2412 /// the wide element type. This is only for the case when promoting vector to
2413 /// one with larger elements.
2414 //
2415 ///
2416 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2417 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2418 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
2419                                                    Register Idx,
2420                                                    unsigned NewEltSize,
2421                                                    unsigned OldEltSize) {
2422   const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2423   LLT IdxTy = B.getMRI()->getType(Idx);
2424 
2425   // Now figure out the amount we need to shift to get the target bits.
2426   auto OffsetMask = B.buildConstant(
2427     IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2428   auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2429   return B.buildShl(IdxTy, OffsetIdx,
2430                     B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2431 }
2432 
2433 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2434 /// is casting to a vector with a smaller element size, perform multiple element
2435 /// extracts and merge the results. If this is coercing to a vector with larger
2436 /// elements, index the bitcasted vector and extract the target element with bit
2437 /// operations. This is intended to force the indexing in the native register
2438 /// size for architectures that can dynamically index the register file.
2439 LegalizerHelper::LegalizeResult
2440 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
2441                                          LLT CastTy) {
2442   if (TypeIdx != 1)
2443     return UnableToLegalize;
2444 
2445   Register Dst = MI.getOperand(0).getReg();
2446   Register SrcVec = MI.getOperand(1).getReg();
2447   Register Idx = MI.getOperand(2).getReg();
2448   LLT SrcVecTy = MRI.getType(SrcVec);
2449   LLT IdxTy = MRI.getType(Idx);
2450 
2451   LLT SrcEltTy = SrcVecTy.getElementType();
2452   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2453   unsigned OldNumElts = SrcVecTy.getNumElements();
2454 
2455   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2456   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2457 
2458   const unsigned NewEltSize = NewEltTy.getSizeInBits();
2459   const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2460   if (NewNumElts > OldNumElts) {
2461     // Decreasing the vector element size
2462     //
2463     // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2464     //  =>
2465     //  v4i32:castx = bitcast x:v2i64
2466     //
2467     // i64 = bitcast
2468     //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2469     //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
2470     //
2471     if (NewNumElts % OldNumElts != 0)
2472       return UnableToLegalize;
2473 
2474     // Type of the intermediate result vector.
2475     const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2476     LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2477 
2478     auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2479 
2480     SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2481     auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2482 
2483     for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2484       auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2485       auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2486       auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2487       NewOps[I] = Elt.getReg(0);
2488     }
2489 
2490     auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2491     MIRBuilder.buildBitcast(Dst, NewVec);
2492     MI.eraseFromParent();
2493     return Legalized;
2494   }
2495 
2496   if (NewNumElts < OldNumElts) {
2497     if (NewEltSize % OldEltSize != 0)
2498       return UnableToLegalize;
2499 
2500     // This only depends on powers of 2 because we use bit tricks to figure out
2501     // the bit offset we need to shift to get the target element. A general
2502     // expansion could emit division/multiply.
2503     if (!isPowerOf2_32(NewEltSize / OldEltSize))
2504       return UnableToLegalize;
2505 
2506     // Increasing the vector element size.
2507     // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2508     //
2509     //   =>
2510     //
2511     // %cast = G_BITCAST %vec
2512     // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2513     // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2514     // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2515     // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2516     // %elt_bits = G_LSHR %wide_elt, %offset_bits
2517     // %elt = G_TRUNC %elt_bits
2518 
2519     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2520     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2521 
2522     // Divide to get the index in the wider element type.
2523     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2524 
2525     Register WideElt = CastVec;
2526     if (CastTy.isVector()) {
2527       WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2528                                                      ScaledIdx).getReg(0);
2529     }
2530 
2531     // Compute the bit offset into the register of the target element.
2532     Register OffsetBits = getBitcastWiderVectorElementOffset(
2533       MIRBuilder, Idx, NewEltSize, OldEltSize);
2534 
2535     // Shift the wide element to get the target element.
2536     auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2537     MIRBuilder.buildTrunc(Dst, ExtractedBits);
2538     MI.eraseFromParent();
2539     return Legalized;
2540   }
2541 
2542   return UnableToLegalize;
2543 }
2544 
2545 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2546 /// TargetReg, while preserving other bits in \p TargetReg.
2547 ///
2548 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2549 static Register buildBitFieldInsert(MachineIRBuilder &B,
2550                                     Register TargetReg, Register InsertReg,
2551                                     Register OffsetBits) {
2552   LLT TargetTy = B.getMRI()->getType(TargetReg);
2553   LLT InsertTy = B.getMRI()->getType(InsertReg);
2554   auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2555   auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2556 
2557   // Produce a bitmask of the value to insert
2558   auto EltMask = B.buildConstant(
2559     TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2560                                    InsertTy.getSizeInBits()));
2561   // Shift it into position
2562   auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2563   auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2564 
2565   // Clear out the bits in the wide element
2566   auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2567 
2568   // The value to insert has all zeros already, so stick it into the masked
2569   // wide element.
2570   return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2571 }
2572 
2573 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2574 /// is increasing the element size, perform the indexing in the target element
2575 /// type, and use bit operations to insert at the element position. This is
2576 /// intended for architectures that can dynamically index the register file and
2577 /// want to force indexing in the native register size.
2578 LegalizerHelper::LegalizeResult
2579 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
2580                                         LLT CastTy) {
2581   if (TypeIdx != 0)
2582     return UnableToLegalize;
2583 
2584   Register Dst = MI.getOperand(0).getReg();
2585   Register SrcVec = MI.getOperand(1).getReg();
2586   Register Val = MI.getOperand(2).getReg();
2587   Register Idx = MI.getOperand(3).getReg();
2588 
2589   LLT VecTy = MRI.getType(Dst);
2590   LLT IdxTy = MRI.getType(Idx);
2591 
2592   LLT VecEltTy = VecTy.getElementType();
2593   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2594   const unsigned NewEltSize = NewEltTy.getSizeInBits();
2595   const unsigned OldEltSize = VecEltTy.getSizeInBits();
2596 
2597   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2598   unsigned OldNumElts = VecTy.getNumElements();
2599 
2600   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2601   if (NewNumElts < OldNumElts) {
2602     if (NewEltSize % OldEltSize != 0)
2603       return UnableToLegalize;
2604 
2605     // This only depends on powers of 2 because we use bit tricks to figure out
2606     // the bit offset we need to shift to get the target element. A general
2607     // expansion could emit division/multiply.
2608     if (!isPowerOf2_32(NewEltSize / OldEltSize))
2609       return UnableToLegalize;
2610 
2611     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2612     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2613 
2614     // Divide to get the index in the wider element type.
2615     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2616 
2617     Register ExtractedElt = CastVec;
2618     if (CastTy.isVector()) {
2619       ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2620                                                           ScaledIdx).getReg(0);
2621     }
2622 
2623     // Compute the bit offset into the register of the target element.
2624     Register OffsetBits = getBitcastWiderVectorElementOffset(
2625       MIRBuilder, Idx, NewEltSize, OldEltSize);
2626 
2627     Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2628                                                Val, OffsetBits);
2629     if (CastTy.isVector()) {
2630       InsertedElt = MIRBuilder.buildInsertVectorElement(
2631         CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2632     }
2633 
2634     MIRBuilder.buildBitcast(Dst, InsertedElt);
2635     MI.eraseFromParent();
2636     return Legalized;
2637   }
2638 
2639   return UnableToLegalize;
2640 }
2641 
2642 LegalizerHelper::LegalizeResult
2643 LegalizerHelper::lowerLoad(MachineInstr &MI) {
2644   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2645   Register DstReg = MI.getOperand(0).getReg();
2646   Register PtrReg = MI.getOperand(1).getReg();
2647   LLT DstTy = MRI.getType(DstReg);
2648   auto &MMO = **MI.memoperands_begin();
2649 
2650   if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2651     if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2652       // This load needs splitting into power of 2 sized loads.
2653       if (DstTy.isVector())
2654         return UnableToLegalize;
2655       if (isPowerOf2_32(DstTy.getSizeInBits()))
2656         return UnableToLegalize; // Don't know what we're being asked to do.
2657 
2658       // Our strategy here is to generate anyextending loads for the smaller
2659       // types up to next power-2 result type, and then combine the two larger
2660       // result values together, before truncating back down to the non-pow-2
2661       // type.
2662       // E.g. v1 = i24 load =>
2663       // v2 = i32 zextload (2 byte)
2664       // v3 = i32 load (1 byte)
2665       // v4 = i32 shl v3, 16
2666       // v5 = i32 or v4, v2
2667       // v1 = i24 trunc v5
2668       // By doing this we generate the correct truncate which should get
2669       // combined away as an artifact with a matching extend.
2670       uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2671       uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2672 
2673       MachineFunction &MF = MIRBuilder.getMF();
2674       MachineMemOperand *LargeMMO =
2675         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2676       MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2677         &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2678 
2679       LLT PtrTy = MRI.getType(PtrReg);
2680       unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2681       LLT AnyExtTy = LLT::scalar(AnyExtSize);
2682       Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2683       Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2684       auto LargeLoad = MIRBuilder.buildLoadInstr(
2685         TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2686 
2687       auto OffsetCst = MIRBuilder.buildConstant(
2688         LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2689       Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2690       auto SmallPtr =
2691         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2692       auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2693                                             *SmallMMO);
2694 
2695       auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2696       auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2697       auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2698       MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2699       MI.eraseFromParent();
2700       return Legalized;
2701     }
2702 
2703     MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2704     MI.eraseFromParent();
2705     return Legalized;
2706   }
2707 
2708   if (DstTy.isScalar()) {
2709     Register TmpReg =
2710       MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2711     MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2712     switch (MI.getOpcode()) {
2713     default:
2714       llvm_unreachable("Unexpected opcode");
2715     case TargetOpcode::G_LOAD:
2716       MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
2717       break;
2718     case TargetOpcode::G_SEXTLOAD:
2719       MIRBuilder.buildSExt(DstReg, TmpReg);
2720       break;
2721     case TargetOpcode::G_ZEXTLOAD:
2722       MIRBuilder.buildZExt(DstReg, TmpReg);
2723       break;
2724     }
2725 
2726     MI.eraseFromParent();
2727     return Legalized;
2728   }
2729 
2730   return UnableToLegalize;
2731 }
2732 
2733 LegalizerHelper::LegalizeResult
2734 LegalizerHelper::lowerStore(MachineInstr &MI) {
2735   // Lower a non-power of 2 store into multiple pow-2 stores.
2736   // E.g. split an i24 store into an i16 store + i8 store.
2737   // We do this by first extending the stored value to the next largest power
2738   // of 2 type, and then using truncating stores to store the components.
2739   // By doing this, likewise with G_LOAD, generate an extend that can be
2740   // artifact-combined away instead of leaving behind extracts.
2741   Register SrcReg = MI.getOperand(0).getReg();
2742   Register PtrReg = MI.getOperand(1).getReg();
2743   LLT SrcTy = MRI.getType(SrcReg);
2744   MachineMemOperand &MMO = **MI.memoperands_begin();
2745   if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2746     return UnableToLegalize;
2747   if (SrcTy.isVector())
2748     return UnableToLegalize;
2749   if (isPowerOf2_32(SrcTy.getSizeInBits()))
2750     return UnableToLegalize; // Don't know what we're being asked to do.
2751 
2752   // Extend to the next pow-2.
2753   const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2754   auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2755 
2756   // Obtain the smaller value by shifting away the larger value.
2757   uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2758   uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2759   auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2760   auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2761 
2762   // Generate the PtrAdd and truncating stores.
2763   LLT PtrTy = MRI.getType(PtrReg);
2764   auto OffsetCst = MIRBuilder.buildConstant(
2765     LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2766   Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2767   auto SmallPtr =
2768     MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2769 
2770   MachineFunction &MF = MIRBuilder.getMF();
2771   MachineMemOperand *LargeMMO =
2772     MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2773   MachineMemOperand *SmallMMO =
2774     MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2775   MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2776   MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2777   MI.eraseFromParent();
2778   return Legalized;
2779 }
2780 
2781 LegalizerHelper::LegalizeResult
2782 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2783   switch (MI.getOpcode()) {
2784   case TargetOpcode::G_LOAD: {
2785     if (TypeIdx != 0)
2786       return UnableToLegalize;
2787 
2788     Observer.changingInstr(MI);
2789     bitcastDst(MI, CastTy, 0);
2790     Observer.changedInstr(MI);
2791     return Legalized;
2792   }
2793   case TargetOpcode::G_STORE: {
2794     if (TypeIdx != 0)
2795       return UnableToLegalize;
2796 
2797     Observer.changingInstr(MI);
2798     bitcastSrc(MI, CastTy, 0);
2799     Observer.changedInstr(MI);
2800     return Legalized;
2801   }
2802   case TargetOpcode::G_SELECT: {
2803     if (TypeIdx != 0)
2804       return UnableToLegalize;
2805 
2806     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2807       LLVM_DEBUG(
2808           dbgs() << "bitcast action not implemented for vector select\n");
2809       return UnableToLegalize;
2810     }
2811 
2812     Observer.changingInstr(MI);
2813     bitcastSrc(MI, CastTy, 2);
2814     bitcastSrc(MI, CastTy, 3);
2815     bitcastDst(MI, CastTy, 0);
2816     Observer.changedInstr(MI);
2817     return Legalized;
2818   }
2819   case TargetOpcode::G_AND:
2820   case TargetOpcode::G_OR:
2821   case TargetOpcode::G_XOR: {
2822     Observer.changingInstr(MI);
2823     bitcastSrc(MI, CastTy, 1);
2824     bitcastSrc(MI, CastTy, 2);
2825     bitcastDst(MI, CastTy, 0);
2826     Observer.changedInstr(MI);
2827     return Legalized;
2828   }
2829   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2830     return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2831   case TargetOpcode::G_INSERT_VECTOR_ELT:
2832     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2833   default:
2834     return UnableToLegalize;
2835   }
2836 }
2837 
2838 // Legalize an instruction by changing the opcode in place.
2839 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2840     Observer.changingInstr(MI);
2841     MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2842     Observer.changedInstr(MI);
2843 }
2844 
2845 LegalizerHelper::LegalizeResult
2846 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2847   using namespace TargetOpcode;
2848 
2849   switch(MI.getOpcode()) {
2850   default:
2851     return UnableToLegalize;
2852   case TargetOpcode::G_BITCAST:
2853     return lowerBitcast(MI);
2854   case TargetOpcode::G_SREM:
2855   case TargetOpcode::G_UREM: {
2856     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2857     auto Quot =
2858         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2859                               {MI.getOperand(1), MI.getOperand(2)});
2860 
2861     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2862     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2863     MI.eraseFromParent();
2864     return Legalized;
2865   }
2866   case TargetOpcode::G_SADDO:
2867   case TargetOpcode::G_SSUBO:
2868     return lowerSADDO_SSUBO(MI);
2869   case TargetOpcode::G_UMULH:
2870   case TargetOpcode::G_SMULH:
2871     return lowerSMULH_UMULH(MI);
2872   case TargetOpcode::G_SMULO:
2873   case TargetOpcode::G_UMULO: {
2874     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2875     // result.
2876     Register Res = MI.getOperand(0).getReg();
2877     Register Overflow = MI.getOperand(1).getReg();
2878     Register LHS = MI.getOperand(2).getReg();
2879     Register RHS = MI.getOperand(3).getReg();
2880     LLT Ty = MRI.getType(Res);
2881 
2882     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2883                           ? TargetOpcode::G_SMULH
2884                           : TargetOpcode::G_UMULH;
2885 
2886     Observer.changingInstr(MI);
2887     const auto &TII = MIRBuilder.getTII();
2888     MI.setDesc(TII.get(TargetOpcode::G_MUL));
2889     MI.RemoveOperand(1);
2890     Observer.changedInstr(MI);
2891 
2892     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2893     auto Zero = MIRBuilder.buildConstant(Ty, 0);
2894 
2895     // Move insert point forward so we can use the Res register if needed.
2896     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2897 
2898     // For *signed* multiply, overflow is detected by checking:
2899     // (hi != (lo >> bitwidth-1))
2900     if (Opcode == TargetOpcode::G_SMULH) {
2901       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2902       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2903       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2904     } else {
2905       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2906     }
2907     return Legalized;
2908   }
2909   case TargetOpcode::G_FNEG: {
2910     Register Res = MI.getOperand(0).getReg();
2911     LLT Ty = MRI.getType(Res);
2912 
2913     // TODO: Handle vector types once we are able to
2914     // represent them.
2915     if (Ty.isVector())
2916       return UnableToLegalize;
2917     auto SignMask =
2918         MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
2919     Register SubByReg = MI.getOperand(1).getReg();
2920     MIRBuilder.buildXor(Res, SubByReg, SignMask);
2921     MI.eraseFromParent();
2922     return Legalized;
2923   }
2924   case TargetOpcode::G_FSUB: {
2925     Register Res = MI.getOperand(0).getReg();
2926     LLT Ty = MRI.getType(Res);
2927 
2928     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2929     // First, check if G_FNEG is marked as Lower. If so, we may
2930     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2931     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2932       return UnableToLegalize;
2933     Register LHS = MI.getOperand(1).getReg();
2934     Register RHS = MI.getOperand(2).getReg();
2935     Register Neg = MRI.createGenericVirtualRegister(Ty);
2936     MIRBuilder.buildFNeg(Neg, RHS);
2937     MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2938     MI.eraseFromParent();
2939     return Legalized;
2940   }
2941   case TargetOpcode::G_FMAD:
2942     return lowerFMad(MI);
2943   case TargetOpcode::G_FFLOOR:
2944     return lowerFFloor(MI);
2945   case TargetOpcode::G_INTRINSIC_ROUND:
2946     return lowerIntrinsicRound(MI);
2947   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
2948     // Since round even is the assumed rounding mode for unconstrained FP
2949     // operations, rint and roundeven are the same operation.
2950     changeOpcode(MI, TargetOpcode::G_FRINT);
2951     return Legalized;
2952   }
2953   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2954     Register OldValRes = MI.getOperand(0).getReg();
2955     Register SuccessRes = MI.getOperand(1).getReg();
2956     Register Addr = MI.getOperand(2).getReg();
2957     Register CmpVal = MI.getOperand(3).getReg();
2958     Register NewVal = MI.getOperand(4).getReg();
2959     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2960                                   **MI.memoperands_begin());
2961     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2962     MI.eraseFromParent();
2963     return Legalized;
2964   }
2965   case TargetOpcode::G_LOAD:
2966   case TargetOpcode::G_SEXTLOAD:
2967   case TargetOpcode::G_ZEXTLOAD:
2968     return lowerLoad(MI);
2969   case TargetOpcode::G_STORE:
2970     return lowerStore(MI);
2971   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2972   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2973   case TargetOpcode::G_CTLZ:
2974   case TargetOpcode::G_CTTZ:
2975   case TargetOpcode::G_CTPOP:
2976     return lowerBitCount(MI);
2977   case G_UADDO: {
2978     Register Res = MI.getOperand(0).getReg();
2979     Register CarryOut = MI.getOperand(1).getReg();
2980     Register LHS = MI.getOperand(2).getReg();
2981     Register RHS = MI.getOperand(3).getReg();
2982 
2983     MIRBuilder.buildAdd(Res, LHS, RHS);
2984     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2985 
2986     MI.eraseFromParent();
2987     return Legalized;
2988   }
2989   case G_UADDE: {
2990     Register Res = MI.getOperand(0).getReg();
2991     Register CarryOut = MI.getOperand(1).getReg();
2992     Register LHS = MI.getOperand(2).getReg();
2993     Register RHS = MI.getOperand(3).getReg();
2994     Register CarryIn = MI.getOperand(4).getReg();
2995     LLT Ty = MRI.getType(Res);
2996 
2997     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
2998     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
2999     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3000     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3001 
3002     MI.eraseFromParent();
3003     return Legalized;
3004   }
3005   case G_USUBO: {
3006     Register Res = MI.getOperand(0).getReg();
3007     Register BorrowOut = MI.getOperand(1).getReg();
3008     Register LHS = MI.getOperand(2).getReg();
3009     Register RHS = MI.getOperand(3).getReg();
3010 
3011     MIRBuilder.buildSub(Res, LHS, RHS);
3012     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3013 
3014     MI.eraseFromParent();
3015     return Legalized;
3016   }
3017   case G_USUBE: {
3018     Register Res = MI.getOperand(0).getReg();
3019     Register BorrowOut = MI.getOperand(1).getReg();
3020     Register LHS = MI.getOperand(2).getReg();
3021     Register RHS = MI.getOperand(3).getReg();
3022     Register BorrowIn = MI.getOperand(4).getReg();
3023     const LLT CondTy = MRI.getType(BorrowOut);
3024     const LLT Ty = MRI.getType(Res);
3025 
3026     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3027     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3028     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3029 
3030     auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3031     auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3032     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3033 
3034     MI.eraseFromParent();
3035     return Legalized;
3036   }
3037   case G_UITOFP:
3038     return lowerUITOFP(MI);
3039   case G_SITOFP:
3040     return lowerSITOFP(MI);
3041   case G_FPTOUI:
3042     return lowerFPTOUI(MI);
3043   case G_FPTOSI:
3044     return lowerFPTOSI(MI);
3045   case G_FPTRUNC:
3046     return lowerFPTRUNC(MI);
3047   case G_FPOWI:
3048     return lowerFPOWI(MI);
3049   case G_SMIN:
3050   case G_SMAX:
3051   case G_UMIN:
3052   case G_UMAX:
3053     return lowerMinMax(MI);
3054   case G_FCOPYSIGN:
3055     return lowerFCopySign(MI);
3056   case G_FMINNUM:
3057   case G_FMAXNUM:
3058     return lowerFMinNumMaxNum(MI);
3059   case G_MERGE_VALUES:
3060     return lowerMergeValues(MI);
3061   case G_UNMERGE_VALUES:
3062     return lowerUnmergeValues(MI);
3063   case TargetOpcode::G_SEXT_INREG: {
3064     assert(MI.getOperand(2).isImm() && "Expected immediate");
3065     int64_t SizeInBits = MI.getOperand(2).getImm();
3066 
3067     Register DstReg = MI.getOperand(0).getReg();
3068     Register SrcReg = MI.getOperand(1).getReg();
3069     LLT DstTy = MRI.getType(DstReg);
3070     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3071 
3072     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3073     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3074     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3075     MI.eraseFromParent();
3076     return Legalized;
3077   }
3078   case G_EXTRACT_VECTOR_ELT:
3079   case G_INSERT_VECTOR_ELT:
3080     return lowerExtractInsertVectorElt(MI);
3081   case G_SHUFFLE_VECTOR:
3082     return lowerShuffleVector(MI);
3083   case G_DYN_STACKALLOC:
3084     return lowerDynStackAlloc(MI);
3085   case G_EXTRACT:
3086     return lowerExtract(MI);
3087   case G_INSERT:
3088     return lowerInsert(MI);
3089   case G_BSWAP:
3090     return lowerBswap(MI);
3091   case G_BITREVERSE:
3092     return lowerBitreverse(MI);
3093   case G_READ_REGISTER:
3094   case G_WRITE_REGISTER:
3095     return lowerReadWriteRegister(MI);
3096   case G_UADDSAT:
3097   case G_USUBSAT: {
3098     // Try to make a reasonable guess about which lowering strategy to use. The
3099     // target can override this with custom lowering and calling the
3100     // implementation functions.
3101     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3102     if (LI.isLegalOrCustom({G_UMIN, Ty}))
3103       return lowerAddSubSatToMinMax(MI);
3104     return lowerAddSubSatToAddoSubo(MI);
3105   }
3106   case G_SADDSAT:
3107   case G_SSUBSAT: {
3108     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3109 
3110     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3111     // since it's a shorter expansion. However, we would need to figure out the
3112     // preferred boolean type for the carry out for the query.
3113     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3114       return lowerAddSubSatToMinMax(MI);
3115     return lowerAddSubSatToAddoSubo(MI);
3116   }
3117   case G_SSHLSAT:
3118   case G_USHLSAT:
3119     return lowerShlSat(MI);
3120   case G_ABS: {
3121     // Expand %res = G_ABS %a into:
3122     // %v1 = G_ASHR %a, scalar_size-1
3123     // %v2 = G_ADD %a, %v1
3124     // %res = G_XOR %v2, %v1
3125     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3126     Register OpReg = MI.getOperand(1).getReg();
3127     auto ShiftAmt =
3128         MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
3129     auto Shift =
3130         MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
3131     auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
3132     MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
3133     MI.eraseFromParent();
3134     return Legalized;
3135   }
3136   case G_SELECT:
3137     return lowerSelect(MI);
3138   }
3139 }
3140 
3141 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3142                                                   Align MinAlign) const {
3143   // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3144   // datalayout for the preferred alignment. Also there should be a target hook
3145   // for this to allow targets to reduce the alignment and ignore the
3146   // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3147   // the type.
3148   return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3149 }
3150 
3151 MachineInstrBuilder
3152 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3153                                       MachinePointerInfo &PtrInfo) {
3154   MachineFunction &MF = MIRBuilder.getMF();
3155   const DataLayout &DL = MIRBuilder.getDataLayout();
3156   int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3157 
3158   unsigned AddrSpace = DL.getAllocaAddrSpace();
3159   LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3160 
3161   PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3162   return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3163 }
3164 
3165 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3166                                         LLT VecTy) {
3167   int64_t IdxVal;
3168   if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3169     return IdxReg;
3170 
3171   LLT IdxTy = B.getMRI()->getType(IdxReg);
3172   unsigned NElts = VecTy.getNumElements();
3173   if (isPowerOf2_32(NElts)) {
3174     APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3175     return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3176   }
3177 
3178   return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3179       .getReg(0);
3180 }
3181 
3182 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3183                                                   Register Index) {
3184   LLT EltTy = VecTy.getElementType();
3185 
3186   // Calculate the element offset and add it to the pointer.
3187   unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3188   assert(EltSize * 8 == EltTy.getSizeInBits() &&
3189          "Converting bits to bytes lost precision");
3190 
3191   Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3192 
3193   LLT IdxTy = MRI.getType(Index);
3194   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3195                                  MIRBuilder.buildConstant(IdxTy, EltSize));
3196 
3197   LLT PtrTy = MRI.getType(VecPtr);
3198   return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3199 }
3200 
3201 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
3202     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3203   Register DstReg = MI.getOperand(0).getReg();
3204   LLT DstTy = MRI.getType(DstReg);
3205   LLT LCMTy = getLCMType(DstTy, NarrowTy);
3206 
3207   unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3208 
3209   auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3210   SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3211 
3212   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3213   MI.eraseFromParent();
3214   return Legalized;
3215 }
3216 
3217 // Handle splitting vector operations which need to have the same number of
3218 // elements in each type index, but each type index may have a different element
3219 // type.
3220 //
3221 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3222 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3223 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3224 //
3225 // Also handles some irregular breakdown cases, e.g.
3226 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3227 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3228 //             s64 = G_SHL s64, s32
3229 LegalizerHelper::LegalizeResult
3230 LegalizerHelper::fewerElementsVectorMultiEltType(
3231   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3232   if (TypeIdx != 0)
3233     return UnableToLegalize;
3234 
3235   const LLT NarrowTy0 = NarrowTyArg;
3236   const unsigned NewNumElts =
3237       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3238 
3239   const Register DstReg = MI.getOperand(0).getReg();
3240   LLT DstTy = MRI.getType(DstReg);
3241   LLT LeftoverTy0;
3242 
3243   // All of the operands need to have the same number of elements, so if we can
3244   // determine a type breakdown for the result type, we can for all of the
3245   // source types.
3246   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3247   if (NumParts < 0)
3248     return UnableToLegalize;
3249 
3250   SmallVector<MachineInstrBuilder, 4> NewInsts;
3251 
3252   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3253   SmallVector<Register, 4> PartRegs, LeftoverRegs;
3254 
3255   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3256     Register SrcReg = MI.getOperand(I).getReg();
3257     LLT SrcTyI = MRI.getType(SrcReg);
3258     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3259     LLT LeftoverTyI;
3260 
3261     // Split this operand into the requested typed registers, and any leftover
3262     // required to reproduce the original type.
3263     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3264                       LeftoverRegs))
3265       return UnableToLegalize;
3266 
3267     if (I == 1) {
3268       // For the first operand, create an instruction for each part and setup
3269       // the result.
3270       for (Register PartReg : PartRegs) {
3271         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3272         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3273                                .addDef(PartDstReg)
3274                                .addUse(PartReg));
3275         DstRegs.push_back(PartDstReg);
3276       }
3277 
3278       for (Register LeftoverReg : LeftoverRegs) {
3279         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3280         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3281                                .addDef(PartDstReg)
3282                                .addUse(LeftoverReg));
3283         LeftoverDstRegs.push_back(PartDstReg);
3284       }
3285     } else {
3286       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3287 
3288       // Add the newly created operand splits to the existing instructions. The
3289       // odd-sized pieces are ordered after the requested NarrowTyArg sized
3290       // pieces.
3291       unsigned InstCount = 0;
3292       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3293         NewInsts[InstCount++].addUse(PartRegs[J]);
3294       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3295         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3296     }
3297 
3298     PartRegs.clear();
3299     LeftoverRegs.clear();
3300   }
3301 
3302   // Insert the newly built operations and rebuild the result register.
3303   for (auto &MIB : NewInsts)
3304     MIRBuilder.insertInstr(MIB);
3305 
3306   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3307 
3308   MI.eraseFromParent();
3309   return Legalized;
3310 }
3311 
3312 LegalizerHelper::LegalizeResult
3313 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
3314                                           LLT NarrowTy) {
3315   if (TypeIdx != 0)
3316     return UnableToLegalize;
3317 
3318   Register DstReg = MI.getOperand(0).getReg();
3319   Register SrcReg = MI.getOperand(1).getReg();
3320   LLT DstTy = MRI.getType(DstReg);
3321   LLT SrcTy = MRI.getType(SrcReg);
3322 
3323   LLT NarrowTy0 = NarrowTy;
3324   LLT NarrowTy1;
3325   unsigned NumParts;
3326 
3327   if (NarrowTy.isVector()) {
3328     // Uneven breakdown not handled.
3329     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3330     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3331       return UnableToLegalize;
3332 
3333     NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3334   } else {
3335     NumParts = DstTy.getNumElements();
3336     NarrowTy1 = SrcTy.getElementType();
3337   }
3338 
3339   SmallVector<Register, 4> SrcRegs, DstRegs;
3340   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3341 
3342   for (unsigned I = 0; I < NumParts; ++I) {
3343     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3344     MachineInstr *NewInst =
3345         MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3346 
3347     NewInst->setFlags(MI.getFlags());
3348     DstRegs.push_back(DstReg);
3349   }
3350 
3351   if (NarrowTy.isVector())
3352     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3353   else
3354     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3355 
3356   MI.eraseFromParent();
3357   return Legalized;
3358 }
3359 
3360 LegalizerHelper::LegalizeResult
3361 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
3362                                         LLT NarrowTy) {
3363   Register DstReg = MI.getOperand(0).getReg();
3364   Register Src0Reg = MI.getOperand(2).getReg();
3365   LLT DstTy = MRI.getType(DstReg);
3366   LLT SrcTy = MRI.getType(Src0Reg);
3367 
3368   unsigned NumParts;
3369   LLT NarrowTy0, NarrowTy1;
3370 
3371   if (TypeIdx == 0) {
3372     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3373     unsigned OldElts = DstTy.getNumElements();
3374 
3375     NarrowTy0 = NarrowTy;
3376     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3377     NarrowTy1 = NarrowTy.isVector() ?
3378       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3379       SrcTy.getElementType();
3380 
3381   } else {
3382     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3383     unsigned OldElts = SrcTy.getNumElements();
3384 
3385     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3386       NarrowTy.getNumElements();
3387     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3388                             DstTy.getScalarSizeInBits());
3389     NarrowTy1 = NarrowTy;
3390   }
3391 
3392   // FIXME: Don't know how to handle the situation where the small vectors
3393   // aren't all the same size yet.
3394   if (NarrowTy1.isVector() &&
3395       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3396     return UnableToLegalize;
3397 
3398   CmpInst::Predicate Pred
3399     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3400 
3401   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3402   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3403   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3404 
3405   for (unsigned I = 0; I < NumParts; ++I) {
3406     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3407     DstRegs.push_back(DstReg);
3408 
3409     if (MI.getOpcode() == TargetOpcode::G_ICMP)
3410       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3411     else {
3412       MachineInstr *NewCmp
3413         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3414       NewCmp->setFlags(MI.getFlags());
3415     }
3416   }
3417 
3418   if (NarrowTy1.isVector())
3419     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3420   else
3421     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3422 
3423   MI.eraseFromParent();
3424   return Legalized;
3425 }
3426 
3427 LegalizerHelper::LegalizeResult
3428 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
3429                                            LLT NarrowTy) {
3430   Register DstReg = MI.getOperand(0).getReg();
3431   Register CondReg = MI.getOperand(1).getReg();
3432 
3433   unsigned NumParts = 0;
3434   LLT NarrowTy0, NarrowTy1;
3435 
3436   LLT DstTy = MRI.getType(DstReg);
3437   LLT CondTy = MRI.getType(CondReg);
3438   unsigned Size = DstTy.getSizeInBits();
3439 
3440   assert(TypeIdx == 0 || CondTy.isVector());
3441 
3442   if (TypeIdx == 0) {
3443     NarrowTy0 = NarrowTy;
3444     NarrowTy1 = CondTy;
3445 
3446     unsigned NarrowSize = NarrowTy0.getSizeInBits();
3447     // FIXME: Don't know how to handle the situation where the small vectors
3448     // aren't all the same size yet.
3449     if (Size % NarrowSize != 0)
3450       return UnableToLegalize;
3451 
3452     NumParts = Size / NarrowSize;
3453 
3454     // Need to break down the condition type
3455     if (CondTy.isVector()) {
3456       if (CondTy.getNumElements() == NumParts)
3457         NarrowTy1 = CondTy.getElementType();
3458       else
3459         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3460                                 CondTy.getScalarSizeInBits());
3461     }
3462   } else {
3463     NumParts = CondTy.getNumElements();
3464     if (NarrowTy.isVector()) {
3465       // TODO: Handle uneven breakdown.
3466       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3467         return UnableToLegalize;
3468 
3469       return UnableToLegalize;
3470     } else {
3471       NarrowTy0 = DstTy.getElementType();
3472       NarrowTy1 = NarrowTy;
3473     }
3474   }
3475 
3476   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3477   if (CondTy.isVector())
3478     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3479 
3480   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3481   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3482 
3483   for (unsigned i = 0; i < NumParts; ++i) {
3484     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3485     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3486                            Src1Regs[i], Src2Regs[i]);
3487     DstRegs.push_back(DstReg);
3488   }
3489 
3490   if (NarrowTy0.isVector())
3491     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3492   else
3493     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3494 
3495   MI.eraseFromParent();
3496   return Legalized;
3497 }
3498 
3499 LegalizerHelper::LegalizeResult
3500 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3501                                         LLT NarrowTy) {
3502   const Register DstReg = MI.getOperand(0).getReg();
3503   LLT PhiTy = MRI.getType(DstReg);
3504   LLT LeftoverTy;
3505 
3506   // All of the operands need to have the same number of elements, so if we can
3507   // determine a type breakdown for the result type, we can for all of the
3508   // source types.
3509   int NumParts, NumLeftover;
3510   std::tie(NumParts, NumLeftover)
3511     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3512   if (NumParts < 0)
3513     return UnableToLegalize;
3514 
3515   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3516   SmallVector<MachineInstrBuilder, 4> NewInsts;
3517 
3518   const int TotalNumParts = NumParts + NumLeftover;
3519 
3520   // Insert the new phis in the result block first.
3521   for (int I = 0; I != TotalNumParts; ++I) {
3522     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3523     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3524     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3525                        .addDef(PartDstReg));
3526     if (I < NumParts)
3527       DstRegs.push_back(PartDstReg);
3528     else
3529       LeftoverDstRegs.push_back(PartDstReg);
3530   }
3531 
3532   MachineBasicBlock *MBB = MI.getParent();
3533   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
3534   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3535 
3536   SmallVector<Register, 4> PartRegs, LeftoverRegs;
3537 
3538   // Insert code to extract the incoming values in each predecessor block.
3539   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3540     PartRegs.clear();
3541     LeftoverRegs.clear();
3542 
3543     Register SrcReg = MI.getOperand(I).getReg();
3544     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3545     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3546 
3547     LLT Unused;
3548     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3549                       LeftoverRegs))
3550       return UnableToLegalize;
3551 
3552     // Add the newly created operand splits to the existing instructions. The
3553     // odd-sized pieces are ordered after the requested NarrowTyArg sized
3554     // pieces.
3555     for (int J = 0; J != TotalNumParts; ++J) {
3556       MachineInstrBuilder MIB = NewInsts[J];
3557       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3558       MIB.addMBB(&OpMBB);
3559     }
3560   }
3561 
3562   MI.eraseFromParent();
3563   return Legalized;
3564 }
3565 
3566 LegalizerHelper::LegalizeResult
3567 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
3568                                                   unsigned TypeIdx,
3569                                                   LLT NarrowTy) {
3570   if (TypeIdx != 1)
3571     return UnableToLegalize;
3572 
3573   const int NumDst = MI.getNumOperands() - 1;
3574   const Register SrcReg = MI.getOperand(NumDst).getReg();
3575   LLT SrcTy = MRI.getType(SrcReg);
3576 
3577   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3578 
3579   // TODO: Create sequence of extracts.
3580   if (DstTy == NarrowTy)
3581     return UnableToLegalize;
3582 
3583   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3584   if (DstTy == GCDTy) {
3585     // This would just be a copy of the same unmerge.
3586     // TODO: Create extracts, pad with undef and create intermediate merges.
3587     return UnableToLegalize;
3588   }
3589 
3590   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3591   const int NumUnmerge = Unmerge->getNumOperands() - 1;
3592   const int PartsPerUnmerge = NumDst / NumUnmerge;
3593 
3594   for (int I = 0; I != NumUnmerge; ++I) {
3595     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3596 
3597     for (int J = 0; J != PartsPerUnmerge; ++J)
3598       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3599     MIB.addUse(Unmerge.getReg(I));
3600   }
3601 
3602   MI.eraseFromParent();
3603   return Legalized;
3604 }
3605 
3606 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3607 // a vector
3608 //
3609 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3610 // undef as necessary.
3611 //
3612 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3613 //   -> <2 x s16>
3614 //
3615 // %4:_(s16) = G_IMPLICIT_DEF
3616 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3617 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3618 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
3619 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3620 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3621 LegalizerHelper::LegalizeResult
3622 LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
3623                                           LLT NarrowTy) {
3624   Register DstReg = MI.getOperand(0).getReg();
3625   LLT DstTy = MRI.getType(DstReg);
3626   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3627   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3628 
3629   // Break into a common type
3630   SmallVector<Register, 16> Parts;
3631   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3632     extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3633 
3634   // Build the requested new merge, padding with undef.
3635   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3636                                   TargetOpcode::G_ANYEXT);
3637 
3638   // Pack into the original result register.
3639   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3640 
3641   MI.eraseFromParent();
3642   return Legalized;
3643 }
3644 
3645 LegalizerHelper::LegalizeResult
3646 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
3647                                                            unsigned TypeIdx,
3648                                                            LLT NarrowVecTy) {
3649   Register DstReg = MI.getOperand(0).getReg();
3650   Register SrcVec = MI.getOperand(1).getReg();
3651   Register InsertVal;
3652   bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3653 
3654   assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3655   if (IsInsert)
3656     InsertVal = MI.getOperand(2).getReg();
3657 
3658   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3659 
3660   // TODO: Handle total scalarization case.
3661   if (!NarrowVecTy.isVector())
3662     return UnableToLegalize;
3663 
3664   LLT VecTy = MRI.getType(SrcVec);
3665 
3666   // If the index is a constant, we can really break this down as you would
3667   // expect, and index into the target size pieces.
3668   int64_t IdxVal;
3669   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
3670     // Avoid out of bounds indexing the pieces.
3671     if (IdxVal >= VecTy.getNumElements()) {
3672       MIRBuilder.buildUndef(DstReg);
3673       MI.eraseFromParent();
3674       return Legalized;
3675     }
3676 
3677     SmallVector<Register, 8> VecParts;
3678     LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3679 
3680     // Build a sequence of NarrowTy pieces in VecParts for this operand.
3681     LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3682                                     TargetOpcode::G_ANYEXT);
3683 
3684     unsigned NewNumElts = NarrowVecTy.getNumElements();
3685 
3686     LLT IdxTy = MRI.getType(Idx);
3687     int64_t PartIdx = IdxVal / NewNumElts;
3688     auto NewIdx =
3689         MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3690 
3691     if (IsInsert) {
3692       LLT PartTy = MRI.getType(VecParts[PartIdx]);
3693 
3694       // Use the adjusted index to insert into one of the subvectors.
3695       auto InsertPart = MIRBuilder.buildInsertVectorElement(
3696           PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3697       VecParts[PartIdx] = InsertPart.getReg(0);
3698 
3699       // Recombine the inserted subvector with the others to reform the result
3700       // vector.
3701       buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3702     } else {
3703       MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3704     }
3705 
3706     MI.eraseFromParent();
3707     return Legalized;
3708   }
3709 
3710   // With a variable index, we can't perform the operation in a smaller type, so
3711   // we're forced to expand this.
3712   //
3713   // TODO: We could emit a chain of compare/select to figure out which piece to
3714   // index.
3715   return lowerExtractInsertVectorElt(MI);
3716 }
3717 
3718 LegalizerHelper::LegalizeResult
3719 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3720                                       LLT NarrowTy) {
3721   // FIXME: Don't know how to handle secondary types yet.
3722   if (TypeIdx != 0)
3723     return UnableToLegalize;
3724 
3725   MachineMemOperand *MMO = *MI.memoperands_begin();
3726 
3727   // This implementation doesn't work for atomics. Give up instead of doing
3728   // something invalid.
3729   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3730       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3731     return UnableToLegalize;
3732 
3733   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3734   Register ValReg = MI.getOperand(0).getReg();
3735   Register AddrReg = MI.getOperand(1).getReg();
3736   LLT ValTy = MRI.getType(ValReg);
3737 
3738   // FIXME: Do we need a distinct NarrowMemory legalize action?
3739   if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3740     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3741     return UnableToLegalize;
3742   }
3743 
3744   int NumParts = -1;
3745   int NumLeftover = -1;
3746   LLT LeftoverTy;
3747   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3748   if (IsLoad) {
3749     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3750   } else {
3751     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3752                      NarrowLeftoverRegs)) {
3753       NumParts = NarrowRegs.size();
3754       NumLeftover = NarrowLeftoverRegs.size();
3755     }
3756   }
3757 
3758   if (NumParts == -1)
3759     return UnableToLegalize;
3760 
3761   LLT PtrTy = MRI.getType(AddrReg);
3762   const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3763 
3764   unsigned TotalSize = ValTy.getSizeInBits();
3765 
3766   // Split the load/store into PartTy sized pieces starting at Offset. If this
3767   // is a load, return the new registers in ValRegs. For a store, each elements
3768   // of ValRegs should be PartTy. Returns the next offset that needs to be
3769   // handled.
3770   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3771                              unsigned Offset) -> unsigned {
3772     MachineFunction &MF = MIRBuilder.getMF();
3773     unsigned PartSize = PartTy.getSizeInBits();
3774     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3775          Offset += PartSize, ++Idx) {
3776       unsigned ByteSize = PartSize / 8;
3777       unsigned ByteOffset = Offset / 8;
3778       Register NewAddrReg;
3779 
3780       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3781 
3782       MachineMemOperand *NewMMO =
3783         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3784 
3785       if (IsLoad) {
3786         Register Dst = MRI.createGenericVirtualRegister(PartTy);
3787         ValRegs.push_back(Dst);
3788         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3789       } else {
3790         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3791       }
3792     }
3793 
3794     return Offset;
3795   };
3796 
3797   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3798 
3799   // Handle the rest of the register if this isn't an even type breakdown.
3800   if (LeftoverTy.isValid())
3801     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3802 
3803   if (IsLoad) {
3804     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3805                 LeftoverTy, NarrowLeftoverRegs);
3806   }
3807 
3808   MI.eraseFromParent();
3809   return Legalized;
3810 }
3811 
3812 LegalizerHelper::LegalizeResult
3813 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
3814                                       LLT NarrowTy) {
3815   assert(TypeIdx == 0 && "only one type index expected");
3816 
3817   const unsigned Opc = MI.getOpcode();
3818   const int NumOps = MI.getNumOperands() - 1;
3819   const Register DstReg = MI.getOperand(0).getReg();
3820   const unsigned Flags = MI.getFlags();
3821   const unsigned NarrowSize = NarrowTy.getSizeInBits();
3822   const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3823 
3824   assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3825 
3826   // First of all check whether we are narrowing (changing the element type)
3827   // or reducing the vector elements
3828   const LLT DstTy = MRI.getType(DstReg);
3829   const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3830 
3831   SmallVector<Register, 8> ExtractedRegs[3];
3832   SmallVector<Register, 8> Parts;
3833 
3834   unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3835 
3836   // Break down all the sources into NarrowTy pieces we can operate on. This may
3837   // involve creating merges to a wider type, padded with undef.
3838   for (int I = 0; I != NumOps; ++I) {
3839     Register SrcReg = MI.getOperand(I + 1).getReg();
3840     LLT SrcTy = MRI.getType(SrcReg);
3841 
3842     // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3843     // For fewerElements, this is a smaller vector with the same element type.
3844     LLT OpNarrowTy;
3845     if (IsNarrow) {
3846       OpNarrowTy = NarrowScalarTy;
3847 
3848       // In case of narrowing, we need to cast vectors to scalars for this to
3849       // work properly
3850       // FIXME: Can we do without the bitcast here if we're narrowing?
3851       if (SrcTy.isVector()) {
3852         SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3853         SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3854       }
3855     } else {
3856       OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
3857     }
3858 
3859     LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
3860 
3861     // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
3862     buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
3863                         TargetOpcode::G_ANYEXT);
3864   }
3865 
3866   SmallVector<Register, 8> ResultRegs;
3867 
3868   // Input operands for each sub-instruction.
3869   SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
3870 
3871   int NumParts = ExtractedRegs[0].size();
3872   const unsigned DstSize = DstTy.getSizeInBits();
3873   const LLT DstScalarTy = LLT::scalar(DstSize);
3874 
3875   // Narrowing needs to use scalar types
3876   LLT DstLCMTy, NarrowDstTy;
3877   if (IsNarrow) {
3878     DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
3879     NarrowDstTy = NarrowScalarTy;
3880   } else {
3881     DstLCMTy = getLCMType(DstTy, NarrowTy);
3882     NarrowDstTy = NarrowTy;
3883   }
3884 
3885   // We widened the source registers to satisfy merge/unmerge size
3886   // constraints. We'll have some extra fully undef parts.
3887   const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
3888 
3889   for (int I = 0; I != NumRealParts; ++I) {
3890     // Emit this instruction on each of the split pieces.
3891     for (int J = 0; J != NumOps; ++J)
3892       InputRegs[J] = ExtractedRegs[J][I];
3893 
3894     auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
3895     ResultRegs.push_back(Inst.getReg(0));
3896   }
3897 
3898   // Fill out the widened result with undef instead of creating instructions
3899   // with undef inputs.
3900   int NumUndefParts = NumParts - NumRealParts;
3901   if (NumUndefParts != 0)
3902     ResultRegs.append(NumUndefParts,
3903                       MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
3904 
3905   // Extract the possibly padded result. Use a scratch register if we need to do
3906   // a final bitcast, otherwise use the original result register.
3907   Register MergeDstReg;
3908   if (IsNarrow && DstTy.isVector())
3909     MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
3910   else
3911     MergeDstReg = DstReg;
3912 
3913   buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
3914 
3915   // Recast to vector if we narrowed a vector
3916   if (IsNarrow && DstTy.isVector())
3917     MIRBuilder.buildBitcast(DstReg, MergeDstReg);
3918 
3919   MI.eraseFromParent();
3920   return Legalized;
3921 }
3922 
3923 LegalizerHelper::LegalizeResult
3924 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
3925                                               LLT NarrowTy) {
3926   Register DstReg = MI.getOperand(0).getReg();
3927   Register SrcReg = MI.getOperand(1).getReg();
3928   int64_t Imm = MI.getOperand(2).getImm();
3929 
3930   LLT DstTy = MRI.getType(DstReg);
3931 
3932   SmallVector<Register, 8> Parts;
3933   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3934   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3935 
3936   for (Register &R : Parts)
3937     R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
3938 
3939   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3940 
3941   MI.eraseFromParent();
3942   return Legalized;
3943 }
3944 
3945 LegalizerHelper::LegalizeResult
3946 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
3947                                      LLT NarrowTy) {
3948   using namespace TargetOpcode;
3949 
3950   switch (MI.getOpcode()) {
3951   case G_IMPLICIT_DEF:
3952     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3953   case G_TRUNC:
3954   case G_AND:
3955   case G_OR:
3956   case G_XOR:
3957   case G_ADD:
3958   case G_SUB:
3959   case G_MUL:
3960   case G_PTR_ADD:
3961   case G_SMULH:
3962   case G_UMULH:
3963   case G_FADD:
3964   case G_FMUL:
3965   case G_FSUB:
3966   case G_FNEG:
3967   case G_FABS:
3968   case G_FCANONICALIZE:
3969   case G_FDIV:
3970   case G_FREM:
3971   case G_FMA:
3972   case G_FMAD:
3973   case G_FPOW:
3974   case G_FEXP:
3975   case G_FEXP2:
3976   case G_FLOG:
3977   case G_FLOG2:
3978   case G_FLOG10:
3979   case G_FNEARBYINT:
3980   case G_FCEIL:
3981   case G_FFLOOR:
3982   case G_FRINT:
3983   case G_INTRINSIC_ROUND:
3984   case G_INTRINSIC_ROUNDEVEN:
3985   case G_INTRINSIC_TRUNC:
3986   case G_FCOS:
3987   case G_FSIN:
3988   case G_FSQRT:
3989   case G_BSWAP:
3990   case G_BITREVERSE:
3991   case G_SDIV:
3992   case G_UDIV:
3993   case G_SREM:
3994   case G_UREM:
3995   case G_SMIN:
3996   case G_SMAX:
3997   case G_UMIN:
3998   case G_UMAX:
3999   case G_FMINNUM:
4000   case G_FMAXNUM:
4001   case G_FMINNUM_IEEE:
4002   case G_FMAXNUM_IEEE:
4003   case G_FMINIMUM:
4004   case G_FMAXIMUM:
4005   case G_FSHL:
4006   case G_FSHR:
4007   case G_FREEZE:
4008   case G_SADDSAT:
4009   case G_SSUBSAT:
4010   case G_UADDSAT:
4011   case G_USUBSAT:
4012     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4013   case G_SHL:
4014   case G_LSHR:
4015   case G_ASHR:
4016   case G_SSHLSAT:
4017   case G_USHLSAT:
4018   case G_CTLZ:
4019   case G_CTLZ_ZERO_UNDEF:
4020   case G_CTTZ:
4021   case G_CTTZ_ZERO_UNDEF:
4022   case G_CTPOP:
4023   case G_FCOPYSIGN:
4024     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4025   case G_ZEXT:
4026   case G_SEXT:
4027   case G_ANYEXT:
4028   case G_FPEXT:
4029   case G_FPTRUNC:
4030   case G_SITOFP:
4031   case G_UITOFP:
4032   case G_FPTOSI:
4033   case G_FPTOUI:
4034   case G_INTTOPTR:
4035   case G_PTRTOINT:
4036   case G_ADDRSPACE_CAST:
4037     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4038   case G_ICMP:
4039   case G_FCMP:
4040     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4041   case G_SELECT:
4042     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4043   case G_PHI:
4044     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4045   case G_UNMERGE_VALUES:
4046     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4047   case G_BUILD_VECTOR:
4048     assert(TypeIdx == 0 && "not a vector type index");
4049     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4050   case G_CONCAT_VECTORS:
4051     if (TypeIdx != 1) // TODO: This probably does work as expected already.
4052       return UnableToLegalize;
4053     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4054   case G_EXTRACT_VECTOR_ELT:
4055   case G_INSERT_VECTOR_ELT:
4056     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4057   case G_LOAD:
4058   case G_STORE:
4059     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4060   case G_SEXT_INREG:
4061     return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4062   default:
4063     return UnableToLegalize;
4064   }
4065 }
4066 
4067 LegalizerHelper::LegalizeResult
4068 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
4069                                              const LLT HalfTy, const LLT AmtTy) {
4070 
4071   Register InL = MRI.createGenericVirtualRegister(HalfTy);
4072   Register InH = MRI.createGenericVirtualRegister(HalfTy);
4073   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4074 
4075   if (Amt.isNullValue()) {
4076     MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4077     MI.eraseFromParent();
4078     return Legalized;
4079   }
4080 
4081   LLT NVT = HalfTy;
4082   unsigned NVTBits = HalfTy.getSizeInBits();
4083   unsigned VTBits = 2 * NVTBits;
4084 
4085   SrcOp Lo(Register(0)), Hi(Register(0));
4086   if (MI.getOpcode() == TargetOpcode::G_SHL) {
4087     if (Amt.ugt(VTBits)) {
4088       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4089     } else if (Amt.ugt(NVTBits)) {
4090       Lo = MIRBuilder.buildConstant(NVT, 0);
4091       Hi = MIRBuilder.buildShl(NVT, InL,
4092                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4093     } else if (Amt == NVTBits) {
4094       Lo = MIRBuilder.buildConstant(NVT, 0);
4095       Hi = InL;
4096     } else {
4097       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4098       auto OrLHS =
4099           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4100       auto OrRHS = MIRBuilder.buildLShr(
4101           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4102       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4103     }
4104   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4105     if (Amt.ugt(VTBits)) {
4106       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4107     } else if (Amt.ugt(NVTBits)) {
4108       Lo = MIRBuilder.buildLShr(NVT, InH,
4109                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4110       Hi = MIRBuilder.buildConstant(NVT, 0);
4111     } else if (Amt == NVTBits) {
4112       Lo = InH;
4113       Hi = MIRBuilder.buildConstant(NVT, 0);
4114     } else {
4115       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4116 
4117       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4118       auto OrRHS = MIRBuilder.buildShl(
4119           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4120 
4121       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4122       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4123     }
4124   } else {
4125     if (Amt.ugt(VTBits)) {
4126       Hi = Lo = MIRBuilder.buildAShr(
4127           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4128     } else if (Amt.ugt(NVTBits)) {
4129       Lo = MIRBuilder.buildAShr(NVT, InH,
4130                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4131       Hi = MIRBuilder.buildAShr(NVT, InH,
4132                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4133     } else if (Amt == NVTBits) {
4134       Lo = InH;
4135       Hi = MIRBuilder.buildAShr(NVT, InH,
4136                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4137     } else {
4138       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4139 
4140       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4141       auto OrRHS = MIRBuilder.buildShl(
4142           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4143 
4144       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4145       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4146     }
4147   }
4148 
4149   MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4150   MI.eraseFromParent();
4151 
4152   return Legalized;
4153 }
4154 
4155 // TODO: Optimize if constant shift amount.
4156 LegalizerHelper::LegalizeResult
4157 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
4158                                    LLT RequestedTy) {
4159   if (TypeIdx == 1) {
4160     Observer.changingInstr(MI);
4161     narrowScalarSrc(MI, RequestedTy, 2);
4162     Observer.changedInstr(MI);
4163     return Legalized;
4164   }
4165 
4166   Register DstReg = MI.getOperand(0).getReg();
4167   LLT DstTy = MRI.getType(DstReg);
4168   if (DstTy.isVector())
4169     return UnableToLegalize;
4170 
4171   Register Amt = MI.getOperand(2).getReg();
4172   LLT ShiftAmtTy = MRI.getType(Amt);
4173   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4174   if (DstEltSize % 2 != 0)
4175     return UnableToLegalize;
4176 
4177   // Ignore the input type. We can only go to exactly half the size of the
4178   // input. If that isn't small enough, the resulting pieces will be further
4179   // legalized.
4180   const unsigned NewBitSize = DstEltSize / 2;
4181   const LLT HalfTy = LLT::scalar(NewBitSize);
4182   const LLT CondTy = LLT::scalar(1);
4183 
4184   if (const MachineInstr *KShiftAmt =
4185           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4186     return narrowScalarShiftByConstant(
4187         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4188   }
4189 
4190   // TODO: Expand with known bits.
4191 
4192   // Handle the fully general expansion by an unknown amount.
4193   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4194 
4195   Register InL = MRI.createGenericVirtualRegister(HalfTy);
4196   Register InH = MRI.createGenericVirtualRegister(HalfTy);
4197   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4198 
4199   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4200   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4201 
4202   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4203   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4204   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4205 
4206   Register ResultRegs[2];
4207   switch (MI.getOpcode()) {
4208   case TargetOpcode::G_SHL: {
4209     // Short: ShAmt < NewBitSize
4210     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4211 
4212     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4213     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4214     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4215 
4216     // Long: ShAmt >= NewBitSize
4217     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
4218     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4219 
4220     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4221     auto Hi = MIRBuilder.buildSelect(
4222         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4223 
4224     ResultRegs[0] = Lo.getReg(0);
4225     ResultRegs[1] = Hi.getReg(0);
4226     break;
4227   }
4228   case TargetOpcode::G_LSHR:
4229   case TargetOpcode::G_ASHR: {
4230     // Short: ShAmt < NewBitSize
4231     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4232 
4233     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4234     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4235     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4236 
4237     // Long: ShAmt >= NewBitSize
4238     MachineInstrBuilder HiL;
4239     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4240       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
4241     } else {
4242       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4243       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
4244     }
4245     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4246                                      {InH, AmtExcess});     // Lo from Hi part.
4247 
4248     auto Lo = MIRBuilder.buildSelect(
4249         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4250 
4251     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4252 
4253     ResultRegs[0] = Lo.getReg(0);
4254     ResultRegs[1] = Hi.getReg(0);
4255     break;
4256   }
4257   default:
4258     llvm_unreachable("not a shift");
4259   }
4260 
4261   MIRBuilder.buildMerge(DstReg, ResultRegs);
4262   MI.eraseFromParent();
4263   return Legalized;
4264 }
4265 
4266 LegalizerHelper::LegalizeResult
4267 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
4268                                        LLT MoreTy) {
4269   assert(TypeIdx == 0 && "Expecting only Idx 0");
4270 
4271   Observer.changingInstr(MI);
4272   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4273     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4274     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4275     moreElementsVectorSrc(MI, MoreTy, I);
4276   }
4277 
4278   MachineBasicBlock &MBB = *MI.getParent();
4279   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
4280   moreElementsVectorDst(MI, MoreTy, 0);
4281   Observer.changedInstr(MI);
4282   return Legalized;
4283 }
4284 
4285 LegalizerHelper::LegalizeResult
4286 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
4287                                     LLT MoreTy) {
4288   unsigned Opc = MI.getOpcode();
4289   switch (Opc) {
4290   case TargetOpcode::G_IMPLICIT_DEF:
4291   case TargetOpcode::G_LOAD: {
4292     if (TypeIdx != 0)
4293       return UnableToLegalize;
4294     Observer.changingInstr(MI);
4295     moreElementsVectorDst(MI, MoreTy, 0);
4296     Observer.changedInstr(MI);
4297     return Legalized;
4298   }
4299   case TargetOpcode::G_STORE:
4300     if (TypeIdx != 0)
4301       return UnableToLegalize;
4302     Observer.changingInstr(MI);
4303     moreElementsVectorSrc(MI, MoreTy, 0);
4304     Observer.changedInstr(MI);
4305     return Legalized;
4306   case TargetOpcode::G_AND:
4307   case TargetOpcode::G_OR:
4308   case TargetOpcode::G_XOR:
4309   case TargetOpcode::G_SMIN:
4310   case TargetOpcode::G_SMAX:
4311   case TargetOpcode::G_UMIN:
4312   case TargetOpcode::G_UMAX:
4313   case TargetOpcode::G_FMINNUM:
4314   case TargetOpcode::G_FMAXNUM:
4315   case TargetOpcode::G_FMINNUM_IEEE:
4316   case TargetOpcode::G_FMAXNUM_IEEE:
4317   case TargetOpcode::G_FMINIMUM:
4318   case TargetOpcode::G_FMAXIMUM: {
4319     Observer.changingInstr(MI);
4320     moreElementsVectorSrc(MI, MoreTy, 1);
4321     moreElementsVectorSrc(MI, MoreTy, 2);
4322     moreElementsVectorDst(MI, MoreTy, 0);
4323     Observer.changedInstr(MI);
4324     return Legalized;
4325   }
4326   case TargetOpcode::G_EXTRACT:
4327     if (TypeIdx != 1)
4328       return UnableToLegalize;
4329     Observer.changingInstr(MI);
4330     moreElementsVectorSrc(MI, MoreTy, 1);
4331     Observer.changedInstr(MI);
4332     return Legalized;
4333   case TargetOpcode::G_INSERT:
4334   case TargetOpcode::G_FREEZE:
4335     if (TypeIdx != 0)
4336       return UnableToLegalize;
4337     Observer.changingInstr(MI);
4338     moreElementsVectorSrc(MI, MoreTy, 1);
4339     moreElementsVectorDst(MI, MoreTy, 0);
4340     Observer.changedInstr(MI);
4341     return Legalized;
4342   case TargetOpcode::G_SELECT:
4343     if (TypeIdx != 0)
4344       return UnableToLegalize;
4345     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4346       return UnableToLegalize;
4347 
4348     Observer.changingInstr(MI);
4349     moreElementsVectorSrc(MI, MoreTy, 2);
4350     moreElementsVectorSrc(MI, MoreTy, 3);
4351     moreElementsVectorDst(MI, MoreTy, 0);
4352     Observer.changedInstr(MI);
4353     return Legalized;
4354   case TargetOpcode::G_UNMERGE_VALUES: {
4355     if (TypeIdx != 1)
4356       return UnableToLegalize;
4357 
4358     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4359     int NumDst = MI.getNumOperands() - 1;
4360     moreElementsVectorSrc(MI, MoreTy, NumDst);
4361 
4362     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4363     for (int I = 0; I != NumDst; ++I)
4364       MIB.addDef(MI.getOperand(I).getReg());
4365 
4366     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
4367     for (int I = NumDst; I != NewNumDst; ++I)
4368       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
4369 
4370     MIB.addUse(MI.getOperand(NumDst).getReg());
4371     MI.eraseFromParent();
4372     return Legalized;
4373   }
4374   case TargetOpcode::G_PHI:
4375     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
4376   default:
4377     return UnableToLegalize;
4378   }
4379 }
4380 
4381 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
4382                                         ArrayRef<Register> Src1Regs,
4383                                         ArrayRef<Register> Src2Regs,
4384                                         LLT NarrowTy) {
4385   MachineIRBuilder &B = MIRBuilder;
4386   unsigned SrcParts = Src1Regs.size();
4387   unsigned DstParts = DstRegs.size();
4388 
4389   unsigned DstIdx = 0; // Low bits of the result.
4390   Register FactorSum =
4391       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4392   DstRegs[DstIdx] = FactorSum;
4393 
4394   unsigned CarrySumPrevDstIdx;
4395   SmallVector<Register, 4> Factors;
4396 
4397   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4398     // Collect low parts of muls for DstIdx.
4399     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4400          i <= std::min(DstIdx, SrcParts - 1); ++i) {
4401       MachineInstrBuilder Mul =
4402           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4403       Factors.push_back(Mul.getReg(0));
4404     }
4405     // Collect high parts of muls from previous DstIdx.
4406     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4407          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
4408       MachineInstrBuilder Umulh =
4409           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4410       Factors.push_back(Umulh.getReg(0));
4411     }
4412     // Add CarrySum from additions calculated for previous DstIdx.
4413     if (DstIdx != 1) {
4414       Factors.push_back(CarrySumPrevDstIdx);
4415     }
4416 
4417     Register CarrySum;
4418     // Add all factors and accumulate all carries into CarrySum.
4419     if (DstIdx != DstParts - 1) {
4420       MachineInstrBuilder Uaddo =
4421           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
4422       FactorSum = Uaddo.getReg(0);
4423       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
4424       for (unsigned i = 2; i < Factors.size(); ++i) {
4425         MachineInstrBuilder Uaddo =
4426             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
4427         FactorSum = Uaddo.getReg(0);
4428         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
4429         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4430       }
4431     } else {
4432       // Since value for the next index is not calculated, neither is CarrySum.
4433       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4434       for (unsigned i = 2; i < Factors.size(); ++i)
4435         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4436     }
4437 
4438     CarrySumPrevDstIdx = CarrySum;
4439     DstRegs[DstIdx] = FactorSum;
4440     Factors.clear();
4441   }
4442 }
4443 
4444 LegalizerHelper::LegalizeResult
4445 LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
4446                                     LLT NarrowTy) {
4447   if (TypeIdx != 0)
4448     return UnableToLegalize;
4449 
4450   Register DstReg = MI.getOperand(0).getReg();
4451   LLT DstType = MRI.getType(DstReg);
4452   // FIXME: add support for vector types
4453   if (DstType.isVector())
4454     return UnableToLegalize;
4455 
4456   uint64_t SizeOp0 = DstType.getSizeInBits();
4457   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4458 
4459   // FIXME: add support for when SizeOp0 isn't an exact multiple of
4460   // NarrowSize.
4461   if (SizeOp0 % NarrowSize != 0)
4462     return UnableToLegalize;
4463 
4464   // Expand in terms of carry-setting/consuming G_<Op>E instructions.
4465   int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
4466 
4467   unsigned OpO, OpE;
4468   switch (MI.getOpcode()) {
4469   case TargetOpcode::G_ADD:
4470     OpO = TargetOpcode::G_UADDO;
4471     OpE = TargetOpcode::G_UADDE;
4472     break;
4473   case TargetOpcode::G_SUB:
4474     OpO = TargetOpcode::G_USUBO;
4475     OpE = TargetOpcode::G_USUBE;
4476     break;
4477   default:
4478     llvm_unreachable("Unexpected add/sub opcode!");
4479   }
4480 
4481   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
4482   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
4483   extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
4484 
4485   Register CarryIn;
4486   for (int i = 0; i < NumParts; ++i) {
4487     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4488     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
4489 
4490     if (i == 0)
4491       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
4492                             {Src1Regs[i], Src2Regs[i]});
4493     else {
4494       MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
4495                             {Src1Regs[i], Src2Regs[i], CarryIn});
4496     }
4497 
4498     DstRegs.push_back(DstReg);
4499     CarryIn = CarryOut;
4500   }
4501   MIRBuilder.buildMerge(DstReg, DstRegs);
4502   MI.eraseFromParent();
4503   return Legalized;
4504 }
4505 
4506 LegalizerHelper::LegalizeResult
4507 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
4508   Register DstReg = MI.getOperand(0).getReg();
4509   Register Src1 = MI.getOperand(1).getReg();
4510   Register Src2 = MI.getOperand(2).getReg();
4511 
4512   LLT Ty = MRI.getType(DstReg);
4513   if (Ty.isVector())
4514     return UnableToLegalize;
4515 
4516   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
4517   unsigned DstSize = Ty.getSizeInBits();
4518   unsigned NarrowSize = NarrowTy.getSizeInBits();
4519   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
4520     return UnableToLegalize;
4521 
4522   unsigned NumDstParts = DstSize / NarrowSize;
4523   unsigned NumSrcParts = SrcSize / NarrowSize;
4524   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
4525   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
4526 
4527   SmallVector<Register, 2> Src1Parts, Src2Parts;
4528   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
4529   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
4530   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
4531   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
4532 
4533   // Take only high half of registers if this is high mul.
4534   ArrayRef<Register> DstRegs(
4535       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
4536   MIRBuilder.buildMerge(DstReg, DstRegs);
4537   MI.eraseFromParent();
4538   return Legalized;
4539 }
4540 
4541 LegalizerHelper::LegalizeResult
4542 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
4543                                      LLT NarrowTy) {
4544   if (TypeIdx != 1)
4545     return UnableToLegalize;
4546 
4547   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4548 
4549   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4550   // FIXME: add support for when SizeOp1 isn't an exact multiple of
4551   // NarrowSize.
4552   if (SizeOp1 % NarrowSize != 0)
4553     return UnableToLegalize;
4554   int NumParts = SizeOp1 / NarrowSize;
4555 
4556   SmallVector<Register, 2> SrcRegs, DstRegs;
4557   SmallVector<uint64_t, 2> Indexes;
4558   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4559 
4560   Register OpReg = MI.getOperand(0).getReg();
4561   uint64_t OpStart = MI.getOperand(2).getImm();
4562   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4563   for (int i = 0; i < NumParts; ++i) {
4564     unsigned SrcStart = i * NarrowSize;
4565 
4566     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
4567       // No part of the extract uses this subregister, ignore it.
4568       continue;
4569     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4570       // The entire subregister is extracted, forward the value.
4571       DstRegs.push_back(SrcRegs[i]);
4572       continue;
4573     }
4574 
4575     // OpSegStart is where this destination segment would start in OpReg if it
4576     // extended infinitely in both directions.
4577     int64_t ExtractOffset;
4578     uint64_t SegSize;
4579     if (OpStart < SrcStart) {
4580       ExtractOffset = 0;
4581       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
4582     } else {
4583       ExtractOffset = OpStart - SrcStart;
4584       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
4585     }
4586 
4587     Register SegReg = SrcRegs[i];
4588     if (ExtractOffset != 0 || SegSize != NarrowSize) {
4589       // A genuine extract is needed.
4590       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4591       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
4592     }
4593 
4594     DstRegs.push_back(SegReg);
4595   }
4596 
4597   Register DstReg = MI.getOperand(0).getReg();
4598   if (MRI.getType(DstReg).isVector())
4599     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4600   else if (DstRegs.size() > 1)
4601     MIRBuilder.buildMerge(DstReg, DstRegs);
4602   else
4603     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
4604   MI.eraseFromParent();
4605   return Legalized;
4606 }
4607 
4608 LegalizerHelper::LegalizeResult
4609 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
4610                                     LLT NarrowTy) {
4611   // FIXME: Don't know how to handle secondary types yet.
4612   if (TypeIdx != 0)
4613     return UnableToLegalize;
4614 
4615   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4616   uint64_t NarrowSize = NarrowTy.getSizeInBits();
4617 
4618   // FIXME: add support for when SizeOp0 isn't an exact multiple of
4619   // NarrowSize.
4620   if (SizeOp0 % NarrowSize != 0)
4621     return UnableToLegalize;
4622 
4623   int NumParts = SizeOp0 / NarrowSize;
4624 
4625   SmallVector<Register, 2> SrcRegs, DstRegs;
4626   SmallVector<uint64_t, 2> Indexes;
4627   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4628 
4629   Register OpReg = MI.getOperand(2).getReg();
4630   uint64_t OpStart = MI.getOperand(3).getImm();
4631   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4632   for (int i = 0; i < NumParts; ++i) {
4633     unsigned DstStart = i * NarrowSize;
4634 
4635     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
4636       // No part of the insert affects this subregister, forward the original.
4637       DstRegs.push_back(SrcRegs[i]);
4638       continue;
4639     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4640       // The entire subregister is defined by this insert, forward the new
4641       // value.
4642       DstRegs.push_back(OpReg);
4643       continue;
4644     }
4645 
4646     // OpSegStart is where this destination segment would start in OpReg if it
4647     // extended infinitely in both directions.
4648     int64_t ExtractOffset, InsertOffset;
4649     uint64_t SegSize;
4650     if (OpStart < DstStart) {
4651       InsertOffset = 0;
4652       ExtractOffset = DstStart - OpStart;
4653       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
4654     } else {
4655       InsertOffset = OpStart - DstStart;
4656       ExtractOffset = 0;
4657       SegSize =
4658         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
4659     }
4660 
4661     Register SegReg = OpReg;
4662     if (ExtractOffset != 0 || SegSize != OpSize) {
4663       // A genuine extract is needed.
4664       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4665       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
4666     }
4667 
4668     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4669     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
4670     DstRegs.push_back(DstReg);
4671   }
4672 
4673   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
4674   Register DstReg = MI.getOperand(0).getReg();
4675   if(MRI.getType(DstReg).isVector())
4676     MIRBuilder.buildBuildVector(DstReg, DstRegs);
4677   else
4678     MIRBuilder.buildMerge(DstReg, DstRegs);
4679   MI.eraseFromParent();
4680   return Legalized;
4681 }
4682 
4683 LegalizerHelper::LegalizeResult
4684 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
4685                                    LLT NarrowTy) {
4686   Register DstReg = MI.getOperand(0).getReg();
4687   LLT DstTy = MRI.getType(DstReg);
4688 
4689   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
4690 
4691   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4692   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
4693   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4694   LLT LeftoverTy;
4695   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
4696                     Src0Regs, Src0LeftoverRegs))
4697     return UnableToLegalize;
4698 
4699   LLT Unused;
4700   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
4701                     Src1Regs, Src1LeftoverRegs))
4702     llvm_unreachable("inconsistent extractParts result");
4703 
4704   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4705     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
4706                                         {Src0Regs[I], Src1Regs[I]});
4707     DstRegs.push_back(Inst.getReg(0));
4708   }
4709 
4710   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4711     auto Inst = MIRBuilder.buildInstr(
4712       MI.getOpcode(),
4713       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
4714     DstLeftoverRegs.push_back(Inst.getReg(0));
4715   }
4716 
4717   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4718               LeftoverTy, DstLeftoverRegs);
4719 
4720   MI.eraseFromParent();
4721   return Legalized;
4722 }
4723 
4724 LegalizerHelper::LegalizeResult
4725 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
4726                                  LLT NarrowTy) {
4727   if (TypeIdx != 0)
4728     return UnableToLegalize;
4729 
4730   Register DstReg = MI.getOperand(0).getReg();
4731   Register SrcReg = MI.getOperand(1).getReg();
4732 
4733   LLT DstTy = MRI.getType(DstReg);
4734   if (DstTy.isVector())
4735     return UnableToLegalize;
4736 
4737   SmallVector<Register, 8> Parts;
4738   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4739   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
4740   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4741 
4742   MI.eraseFromParent();
4743   return Legalized;
4744 }
4745 
4746 LegalizerHelper::LegalizeResult
4747 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
4748                                     LLT NarrowTy) {
4749   if (TypeIdx != 0)
4750     return UnableToLegalize;
4751 
4752   Register CondReg = MI.getOperand(1).getReg();
4753   LLT CondTy = MRI.getType(CondReg);
4754   if (CondTy.isVector()) // TODO: Handle vselect
4755     return UnableToLegalize;
4756 
4757   Register DstReg = MI.getOperand(0).getReg();
4758   LLT DstTy = MRI.getType(DstReg);
4759 
4760   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4761   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4762   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
4763   LLT LeftoverTy;
4764   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
4765                     Src1Regs, Src1LeftoverRegs))
4766     return UnableToLegalize;
4767 
4768   LLT Unused;
4769   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
4770                     Src2Regs, Src2LeftoverRegs))
4771     llvm_unreachable("inconsistent extractParts result");
4772 
4773   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4774     auto Select = MIRBuilder.buildSelect(NarrowTy,
4775                                          CondReg, Src1Regs[I], Src2Regs[I]);
4776     DstRegs.push_back(Select.getReg(0));
4777   }
4778 
4779   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4780     auto Select = MIRBuilder.buildSelect(
4781       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
4782     DstLeftoverRegs.push_back(Select.getReg(0));
4783   }
4784 
4785   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4786               LeftoverTy, DstLeftoverRegs);
4787 
4788   MI.eraseFromParent();
4789   return Legalized;
4790 }
4791 
4792 LegalizerHelper::LegalizeResult
4793 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
4794                                   LLT NarrowTy) {
4795   if (TypeIdx != 1)
4796     return UnableToLegalize;
4797 
4798   Register DstReg = MI.getOperand(0).getReg();
4799   Register SrcReg = MI.getOperand(1).getReg();
4800   LLT DstTy = MRI.getType(DstReg);
4801   LLT SrcTy = MRI.getType(SrcReg);
4802   unsigned NarrowSize = NarrowTy.getSizeInBits();
4803 
4804   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4805     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
4806 
4807     MachineIRBuilder &B = MIRBuilder;
4808     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4809     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
4810     auto C_0 = B.buildConstant(NarrowTy, 0);
4811     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4812                                 UnmergeSrc.getReg(1), C_0);
4813     auto LoCTLZ = IsUndef ?
4814       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4815       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
4816     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4817     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
4818     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
4819     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
4820 
4821     MI.eraseFromParent();
4822     return Legalized;
4823   }
4824 
4825   return UnableToLegalize;
4826 }
4827 
4828 LegalizerHelper::LegalizeResult
4829 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
4830                                   LLT NarrowTy) {
4831   if (TypeIdx != 1)
4832     return UnableToLegalize;
4833 
4834   Register DstReg = MI.getOperand(0).getReg();
4835   Register SrcReg = MI.getOperand(1).getReg();
4836   LLT DstTy = MRI.getType(DstReg);
4837   LLT SrcTy = MRI.getType(SrcReg);
4838   unsigned NarrowSize = NarrowTy.getSizeInBits();
4839 
4840   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4841     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
4842 
4843     MachineIRBuilder &B = MIRBuilder;
4844     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4845     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
4846     auto C_0 = B.buildConstant(NarrowTy, 0);
4847     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4848                                 UnmergeSrc.getReg(0), C_0);
4849     auto HiCTTZ = IsUndef ?
4850       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
4851       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
4852     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4853     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
4854     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
4855     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
4856 
4857     MI.eraseFromParent();
4858     return Legalized;
4859   }
4860 
4861   return UnableToLegalize;
4862 }
4863 
4864 LegalizerHelper::LegalizeResult
4865 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
4866                                    LLT NarrowTy) {
4867   if (TypeIdx != 1)
4868     return UnableToLegalize;
4869 
4870   Register DstReg = MI.getOperand(0).getReg();
4871   LLT DstTy = MRI.getType(DstReg);
4872   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
4873   unsigned NarrowSize = NarrowTy.getSizeInBits();
4874 
4875   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4876     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
4877 
4878     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
4879     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
4880     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
4881 
4882     MI.eraseFromParent();
4883     return Legalized;
4884   }
4885 
4886   return UnableToLegalize;
4887 }
4888 
4889 LegalizerHelper::LegalizeResult
4890 LegalizerHelper::lowerBitCount(MachineInstr &MI) {
4891   unsigned Opc = MI.getOpcode();
4892   const auto &TII = MIRBuilder.getTII();
4893   auto isSupported = [this](const LegalityQuery &Q) {
4894     auto QAction = LI.getAction(Q).Action;
4895     return QAction == Legal || QAction == Libcall || QAction == Custom;
4896   };
4897   switch (Opc) {
4898   default:
4899     return UnableToLegalize;
4900   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
4901     // This trivially expands to CTLZ.
4902     Observer.changingInstr(MI);
4903     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
4904     Observer.changedInstr(MI);
4905     return Legalized;
4906   }
4907   case TargetOpcode::G_CTLZ: {
4908     Register DstReg = MI.getOperand(0).getReg();
4909     Register SrcReg = MI.getOperand(1).getReg();
4910     LLT DstTy = MRI.getType(DstReg);
4911     LLT SrcTy = MRI.getType(SrcReg);
4912     unsigned Len = SrcTy.getSizeInBits();
4913 
4914     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4915       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
4916       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
4917       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
4918       auto ICmp = MIRBuilder.buildICmp(
4919           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
4920       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4921       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
4922       MI.eraseFromParent();
4923       return Legalized;
4924     }
4925     // for now, we do this:
4926     // NewLen = NextPowerOf2(Len);
4927     // x = x | (x >> 1);
4928     // x = x | (x >> 2);
4929     // ...
4930     // x = x | (x >>16);
4931     // x = x | (x >>32); // for 64-bit input
4932     // Upto NewLen/2
4933     // return Len - popcount(x);
4934     //
4935     // Ref: "Hacker's Delight" by Henry Warren
4936     Register Op = SrcReg;
4937     unsigned NewLen = PowerOf2Ceil(Len);
4938     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
4939       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
4940       auto MIBOp = MIRBuilder.buildOr(
4941           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
4942       Op = MIBOp.getReg(0);
4943     }
4944     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
4945     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
4946                         MIBPop);
4947     MI.eraseFromParent();
4948     return Legalized;
4949   }
4950   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
4951     // This trivially expands to CTTZ.
4952     Observer.changingInstr(MI);
4953     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
4954     Observer.changedInstr(MI);
4955     return Legalized;
4956   }
4957   case TargetOpcode::G_CTTZ: {
4958     Register DstReg = MI.getOperand(0).getReg();
4959     Register SrcReg = MI.getOperand(1).getReg();
4960     LLT DstTy = MRI.getType(DstReg);
4961     LLT SrcTy = MRI.getType(SrcReg);
4962 
4963     unsigned Len = SrcTy.getSizeInBits();
4964     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4965       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
4966       // zero.
4967       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
4968       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
4969       auto ICmp = MIRBuilder.buildICmp(
4970           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
4971       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4972       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
4973       MI.eraseFromParent();
4974       return Legalized;
4975     }
4976     // for now, we use: { return popcount(~x & (x - 1)); }
4977     // unless the target has ctlz but not ctpop, in which case we use:
4978     // { return 32 - nlz(~x & (x-1)); }
4979     // Ref: "Hacker's Delight" by Henry Warren
4980     auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
4981     auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
4982     auto MIBTmp = MIRBuilder.buildAnd(
4983         SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
4984     if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
4985         isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
4986       auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
4987       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
4988                           MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
4989       MI.eraseFromParent();
4990       return Legalized;
4991     }
4992     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
4993     MI.getOperand(1).setReg(MIBTmp.getReg(0));
4994     return Legalized;
4995   }
4996   case TargetOpcode::G_CTPOP: {
4997     Register SrcReg = MI.getOperand(1).getReg();
4998     LLT Ty = MRI.getType(SrcReg);
4999     unsigned Size = Ty.getSizeInBits();
5000     MachineIRBuilder &B = MIRBuilder;
5001 
5002     // Count set bits in blocks of 2 bits. Default approach would be
5003     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
5004     // We use following formula instead:
5005     // B2Count = val - { (val >> 1) & 0x55555555 }
5006     // since it gives same result in blocks of 2 with one instruction less.
5007     auto C_1 = B.buildConstant(Ty, 1);
5008     auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
5009     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
5010     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
5011     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
5012     auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
5013 
5014     // In order to get count in blocks of 4 add values from adjacent block of 2.
5015     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
5016     auto C_2 = B.buildConstant(Ty, 2);
5017     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
5018     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
5019     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
5020     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
5021     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
5022     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
5023 
5024     // For count in blocks of 8 bits we don't have to mask high 4 bits before
5025     // addition since count value sits in range {0,...,8} and 4 bits are enough
5026     // to hold such binary values. After addition high 4 bits still hold count
5027     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
5028     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
5029     auto C_4 = B.buildConstant(Ty, 4);
5030     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
5031     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
5032     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
5033     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
5034     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
5035 
5036     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
5037     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
5038     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
5039     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
5040     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
5041 
5042     // Shift count result from 8 high bits to low bits.
5043     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
5044     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
5045 
5046     MI.eraseFromParent();
5047     return Legalized;
5048   }
5049   }
5050 }
5051 
5052 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
5053 // representation.
5054 LegalizerHelper::LegalizeResult
5055 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
5056   Register Dst = MI.getOperand(0).getReg();
5057   Register Src = MI.getOperand(1).getReg();
5058   const LLT S64 = LLT::scalar(64);
5059   const LLT S32 = LLT::scalar(32);
5060   const LLT S1 = LLT::scalar(1);
5061 
5062   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
5063 
5064   // unsigned cul2f(ulong u) {
5065   //   uint lz = clz(u);
5066   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
5067   //   u = (u << lz) & 0x7fffffffffffffffUL;
5068   //   ulong t = u & 0xffffffffffUL;
5069   //   uint v = (e << 23) | (uint)(u >> 40);
5070   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
5071   //   return as_float(v + r);
5072   // }
5073 
5074   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
5075   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
5076 
5077   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
5078 
5079   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
5080   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
5081 
5082   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
5083   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
5084 
5085   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
5086   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
5087 
5088   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
5089 
5090   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
5091   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
5092 
5093   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
5094   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
5095   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
5096 
5097   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
5098   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
5099   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
5100   auto One = MIRBuilder.buildConstant(S32, 1);
5101 
5102   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
5103   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
5104   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
5105   MIRBuilder.buildAdd(Dst, V, R);
5106 
5107   MI.eraseFromParent();
5108   return Legalized;
5109 }
5110 
5111 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
5112   Register Dst = MI.getOperand(0).getReg();
5113   Register Src = MI.getOperand(1).getReg();
5114   LLT DstTy = MRI.getType(Dst);
5115   LLT SrcTy = MRI.getType(Src);
5116 
5117   if (SrcTy == LLT::scalar(1)) {
5118     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
5119     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5120     MIRBuilder.buildSelect(Dst, Src, True, False);
5121     MI.eraseFromParent();
5122     return Legalized;
5123   }
5124 
5125   if (SrcTy != LLT::scalar(64))
5126     return UnableToLegalize;
5127 
5128   if (DstTy == LLT::scalar(32)) {
5129     // TODO: SelectionDAG has several alternative expansions to port which may
5130     // be more reasonble depending on the available instructions. If a target
5131     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
5132     // intermediate type, this is probably worse.
5133     return lowerU64ToF32BitOps(MI);
5134   }
5135 
5136   return UnableToLegalize;
5137 }
5138 
5139 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
5140   Register Dst = MI.getOperand(0).getReg();
5141   Register Src = MI.getOperand(1).getReg();
5142   LLT DstTy = MRI.getType(Dst);
5143   LLT SrcTy = MRI.getType(Src);
5144 
5145   const LLT S64 = LLT::scalar(64);
5146   const LLT S32 = LLT::scalar(32);
5147   const LLT S1 = LLT::scalar(1);
5148 
5149   if (SrcTy == S1) {
5150     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
5151     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5152     MIRBuilder.buildSelect(Dst, Src, True, False);
5153     MI.eraseFromParent();
5154     return Legalized;
5155   }
5156 
5157   if (SrcTy != S64)
5158     return UnableToLegalize;
5159 
5160   if (DstTy == S32) {
5161     // signed cl2f(long l) {
5162     //   long s = l >> 63;
5163     //   float r = cul2f((l + s) ^ s);
5164     //   return s ? -r : r;
5165     // }
5166     Register L = Src;
5167     auto SignBit = MIRBuilder.buildConstant(S64, 63);
5168     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
5169 
5170     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
5171     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
5172     auto R = MIRBuilder.buildUITOFP(S32, Xor);
5173 
5174     auto RNeg = MIRBuilder.buildFNeg(S32, R);
5175     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
5176                                             MIRBuilder.buildConstant(S64, 0));
5177     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
5178     MI.eraseFromParent();
5179     return Legalized;
5180   }
5181 
5182   return UnableToLegalize;
5183 }
5184 
5185 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
5186   Register Dst = MI.getOperand(0).getReg();
5187   Register Src = MI.getOperand(1).getReg();
5188   LLT DstTy = MRI.getType(Dst);
5189   LLT SrcTy = MRI.getType(Src);
5190   const LLT S64 = LLT::scalar(64);
5191   const LLT S32 = LLT::scalar(32);
5192 
5193   if (SrcTy != S64 && SrcTy != S32)
5194     return UnableToLegalize;
5195   if (DstTy != S32 && DstTy != S64)
5196     return UnableToLegalize;
5197 
5198   // FPTOSI gives same result as FPTOUI for positive signed integers.
5199   // FPTOUI needs to deal with fp values that convert to unsigned integers
5200   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
5201 
5202   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
5203   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
5204                                                 : APFloat::IEEEdouble(),
5205                     APInt::getNullValue(SrcTy.getSizeInBits()));
5206   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
5207 
5208   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
5209 
5210   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
5211   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
5212   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
5213   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
5214   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
5215   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
5216   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
5217 
5218   const LLT S1 = LLT::scalar(1);
5219 
5220   MachineInstrBuilder FCMP =
5221       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
5222   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
5223 
5224   MI.eraseFromParent();
5225   return Legalized;
5226 }
5227 
5228 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
5229   Register Dst = MI.getOperand(0).getReg();
5230   Register Src = MI.getOperand(1).getReg();
5231   LLT DstTy = MRI.getType(Dst);
5232   LLT SrcTy = MRI.getType(Src);
5233   const LLT S64 = LLT::scalar(64);
5234   const LLT S32 = LLT::scalar(32);
5235 
5236   // FIXME: Only f32 to i64 conversions are supported.
5237   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
5238     return UnableToLegalize;
5239 
5240   // Expand f32 -> i64 conversion
5241   // This algorithm comes from compiler-rt's implementation of fixsfdi:
5242   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
5243 
5244   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
5245 
5246   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
5247   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
5248 
5249   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
5250   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
5251 
5252   auto SignMask = MIRBuilder.buildConstant(SrcTy,
5253                                            APInt::getSignMask(SrcEltBits));
5254   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
5255   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
5256   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
5257   Sign = MIRBuilder.buildSExt(DstTy, Sign);
5258 
5259   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
5260   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
5261   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
5262 
5263   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
5264   R = MIRBuilder.buildZExt(DstTy, R);
5265 
5266   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
5267   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
5268   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
5269   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
5270 
5271   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
5272   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
5273 
5274   const LLT S1 = LLT::scalar(1);
5275   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
5276                                     S1, Exponent, ExponentLoBit);
5277 
5278   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
5279 
5280   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
5281   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
5282 
5283   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
5284 
5285   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
5286                                           S1, Exponent, ZeroSrcTy);
5287 
5288   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
5289   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
5290 
5291   MI.eraseFromParent();
5292   return Legalized;
5293 }
5294 
5295 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
5296 LegalizerHelper::LegalizeResult
5297 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
5298   Register Dst = MI.getOperand(0).getReg();
5299   Register Src = MI.getOperand(1).getReg();
5300 
5301   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
5302     return UnableToLegalize;
5303 
5304   const unsigned ExpMask = 0x7ff;
5305   const unsigned ExpBiasf64 = 1023;
5306   const unsigned ExpBiasf16 = 15;
5307   const LLT S32 = LLT::scalar(32);
5308   const LLT S1 = LLT::scalar(1);
5309 
5310   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
5311   Register U = Unmerge.getReg(0);
5312   Register UH = Unmerge.getReg(1);
5313 
5314   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
5315   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
5316 
5317   // Subtract the fp64 exponent bias (1023) to get the real exponent and
5318   // add the f16 bias (15) to get the biased exponent for the f16 format.
5319   E = MIRBuilder.buildAdd(
5320     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
5321 
5322   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
5323   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
5324 
5325   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
5326                                        MIRBuilder.buildConstant(S32, 0x1ff));
5327   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
5328 
5329   auto Zero = MIRBuilder.buildConstant(S32, 0);
5330   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
5331   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
5332   M = MIRBuilder.buildOr(S32, M, Lo40Set);
5333 
5334   // (M != 0 ? 0x0200 : 0) | 0x7c00;
5335   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
5336   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
5337   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
5338 
5339   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
5340   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
5341 
5342   // N = M | (E << 12);
5343   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
5344   auto N = MIRBuilder.buildOr(S32, M, EShl12);
5345 
5346   // B = clamp(1-E, 0, 13);
5347   auto One = MIRBuilder.buildConstant(S32, 1);
5348   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
5349   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
5350   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
5351 
5352   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
5353                                        MIRBuilder.buildConstant(S32, 0x1000));
5354 
5355   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
5356   auto D0 = MIRBuilder.buildShl(S32, D, B);
5357 
5358   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
5359                                              D0, SigSetHigh);
5360   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
5361   D = MIRBuilder.buildOr(S32, D, D1);
5362 
5363   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
5364   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
5365 
5366   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
5367   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
5368 
5369   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
5370                                        MIRBuilder.buildConstant(S32, 3));
5371   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
5372 
5373   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
5374                                        MIRBuilder.buildConstant(S32, 5));
5375   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
5376 
5377   V1 = MIRBuilder.buildOr(S32, V0, V1);
5378   V = MIRBuilder.buildAdd(S32, V, V1);
5379 
5380   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
5381                                        E, MIRBuilder.buildConstant(S32, 30));
5382   V = MIRBuilder.buildSelect(S32, CmpEGt30,
5383                              MIRBuilder.buildConstant(S32, 0x7c00), V);
5384 
5385   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
5386                                          E, MIRBuilder.buildConstant(S32, 1039));
5387   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
5388 
5389   // Extract the sign bit.
5390   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
5391   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
5392 
5393   // Insert the sign bit
5394   V = MIRBuilder.buildOr(S32, Sign, V);
5395 
5396   MIRBuilder.buildTrunc(Dst, V);
5397   MI.eraseFromParent();
5398   return Legalized;
5399 }
5400 
5401 LegalizerHelper::LegalizeResult
5402 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
5403   Register Dst = MI.getOperand(0).getReg();
5404   Register Src = MI.getOperand(1).getReg();
5405 
5406   LLT DstTy = MRI.getType(Dst);
5407   LLT SrcTy = MRI.getType(Src);
5408   const LLT S64 = LLT::scalar(64);
5409   const LLT S16 = LLT::scalar(16);
5410 
5411   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
5412     return lowerFPTRUNC_F64_TO_F16(MI);
5413 
5414   return UnableToLegalize;
5415 }
5416 
5417 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
5418 // multiplication tree.
5419 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
5420   Register Dst = MI.getOperand(0).getReg();
5421   Register Src0 = MI.getOperand(1).getReg();
5422   Register Src1 = MI.getOperand(2).getReg();
5423   LLT Ty = MRI.getType(Dst);
5424 
5425   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
5426   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
5427   MI.eraseFromParent();
5428   return Legalized;
5429 }
5430 
5431 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
5432   switch (Opc) {
5433   case TargetOpcode::G_SMIN:
5434     return CmpInst::ICMP_SLT;
5435   case TargetOpcode::G_SMAX:
5436     return CmpInst::ICMP_SGT;
5437   case TargetOpcode::G_UMIN:
5438     return CmpInst::ICMP_ULT;
5439   case TargetOpcode::G_UMAX:
5440     return CmpInst::ICMP_UGT;
5441   default:
5442     llvm_unreachable("not in integer min/max");
5443   }
5444 }
5445 
5446 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
5447   Register Dst = MI.getOperand(0).getReg();
5448   Register Src0 = MI.getOperand(1).getReg();
5449   Register Src1 = MI.getOperand(2).getReg();
5450 
5451   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
5452   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
5453 
5454   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
5455   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
5456 
5457   MI.eraseFromParent();
5458   return Legalized;
5459 }
5460 
5461 LegalizerHelper::LegalizeResult
5462 LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
5463   Register Dst = MI.getOperand(0).getReg();
5464   Register Src0 = MI.getOperand(1).getReg();
5465   Register Src1 = MI.getOperand(2).getReg();
5466 
5467   const LLT Src0Ty = MRI.getType(Src0);
5468   const LLT Src1Ty = MRI.getType(Src1);
5469 
5470   const int Src0Size = Src0Ty.getScalarSizeInBits();
5471   const int Src1Size = Src1Ty.getScalarSizeInBits();
5472 
5473   auto SignBitMask = MIRBuilder.buildConstant(
5474     Src0Ty, APInt::getSignMask(Src0Size));
5475 
5476   auto NotSignBitMask = MIRBuilder.buildConstant(
5477     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
5478 
5479   Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
5480   Register And1;
5481   if (Src0Ty == Src1Ty) {
5482     And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
5483   } else if (Src0Size > Src1Size) {
5484     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
5485     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
5486     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
5487     And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
5488   } else {
5489     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
5490     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
5491     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
5492     And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
5493   }
5494 
5495   // Be careful about setting nsz/nnan/ninf on every instruction, since the
5496   // constants are a nan and -0.0, but the final result should preserve
5497   // everything.
5498   unsigned Flags = MI.getFlags();
5499   MIRBuilder.buildOr(Dst, And0, And1, Flags);
5500 
5501   MI.eraseFromParent();
5502   return Legalized;
5503 }
5504 
5505 LegalizerHelper::LegalizeResult
5506 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
5507   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
5508     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
5509 
5510   Register Dst = MI.getOperand(0).getReg();
5511   Register Src0 = MI.getOperand(1).getReg();
5512   Register Src1 = MI.getOperand(2).getReg();
5513   LLT Ty = MRI.getType(Dst);
5514 
5515   if (!MI.getFlag(MachineInstr::FmNoNans)) {
5516     // Insert canonicalizes if it's possible we need to quiet to get correct
5517     // sNaN behavior.
5518 
5519     // Note this must be done here, and not as an optimization combine in the
5520     // absence of a dedicate quiet-snan instruction as we're using an
5521     // omni-purpose G_FCANONICALIZE.
5522     if (!isKnownNeverSNaN(Src0, MRI))
5523       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
5524 
5525     if (!isKnownNeverSNaN(Src1, MRI))
5526       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
5527   }
5528 
5529   // If there are no nans, it's safe to simply replace this with the non-IEEE
5530   // version.
5531   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
5532   MI.eraseFromParent();
5533   return Legalized;
5534 }
5535 
5536 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
5537   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
5538   Register DstReg = MI.getOperand(0).getReg();
5539   LLT Ty = MRI.getType(DstReg);
5540   unsigned Flags = MI.getFlags();
5541 
5542   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
5543                                   Flags);
5544   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
5545   MI.eraseFromParent();
5546   return Legalized;
5547 }
5548 
5549 LegalizerHelper::LegalizeResult
5550 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
5551   Register DstReg = MI.getOperand(0).getReg();
5552   Register X = MI.getOperand(1).getReg();
5553   const unsigned Flags = MI.getFlags();
5554   const LLT Ty = MRI.getType(DstReg);
5555   const LLT CondTy = Ty.changeElementSize(1);
5556 
5557   // round(x) =>
5558   //  t = trunc(x);
5559   //  d = fabs(x - t);
5560   //  o = copysign(1.0f, x);
5561   //  return t + (d >= 0.5 ? o : 0.0);
5562 
5563   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
5564 
5565   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
5566   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
5567   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5568   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
5569   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
5570   auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
5571 
5572   auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
5573                                   Flags);
5574   auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
5575 
5576   MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
5577 
5578   MI.eraseFromParent();
5579   return Legalized;
5580 }
5581 
5582 LegalizerHelper::LegalizeResult
5583 LegalizerHelper::lowerFFloor(MachineInstr &MI) {
5584   Register DstReg = MI.getOperand(0).getReg();
5585   Register SrcReg = MI.getOperand(1).getReg();
5586   unsigned Flags = MI.getFlags();
5587   LLT Ty = MRI.getType(DstReg);
5588   const LLT CondTy = Ty.changeElementSize(1);
5589 
5590   // result = trunc(src);
5591   // if (src < 0.0 && src != result)
5592   //   result += -1.0.
5593 
5594   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
5595   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5596 
5597   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
5598                                   SrcReg, Zero, Flags);
5599   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
5600                                       SrcReg, Trunc, Flags);
5601   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
5602   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
5603 
5604   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
5605   MI.eraseFromParent();
5606   return Legalized;
5607 }
5608 
5609 LegalizerHelper::LegalizeResult
5610 LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
5611   const unsigned NumOps = MI.getNumOperands();
5612   Register DstReg = MI.getOperand(0).getReg();
5613   Register Src0Reg = MI.getOperand(1).getReg();
5614   LLT DstTy = MRI.getType(DstReg);
5615   LLT SrcTy = MRI.getType(Src0Reg);
5616   unsigned PartSize = SrcTy.getSizeInBits();
5617 
5618   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
5619   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
5620 
5621   for (unsigned I = 2; I != NumOps; ++I) {
5622     const unsigned Offset = (I - 1) * PartSize;
5623 
5624     Register SrcReg = MI.getOperand(I).getReg();
5625     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
5626 
5627     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
5628       MRI.createGenericVirtualRegister(WideTy);
5629 
5630     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
5631     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
5632     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
5633     ResultReg = NextResult;
5634   }
5635 
5636   if (DstTy.isPointer()) {
5637     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
5638           DstTy.getAddressSpace())) {
5639       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
5640       return UnableToLegalize;
5641     }
5642 
5643     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
5644   }
5645 
5646   MI.eraseFromParent();
5647   return Legalized;
5648 }
5649 
5650 LegalizerHelper::LegalizeResult
5651 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
5652   const unsigned NumDst = MI.getNumOperands() - 1;
5653   Register SrcReg = MI.getOperand(NumDst).getReg();
5654   Register Dst0Reg = MI.getOperand(0).getReg();
5655   LLT DstTy = MRI.getType(Dst0Reg);
5656   if (DstTy.isPointer())
5657     return UnableToLegalize; // TODO
5658 
5659   SrcReg = coerceToScalar(SrcReg);
5660   if (!SrcReg)
5661     return UnableToLegalize;
5662 
5663   // Expand scalarizing unmerge as bitcast to integer and shift.
5664   LLT IntTy = MRI.getType(SrcReg);
5665 
5666   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
5667 
5668   const unsigned DstSize = DstTy.getSizeInBits();
5669   unsigned Offset = DstSize;
5670   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
5671     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
5672     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
5673     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
5674   }
5675 
5676   MI.eraseFromParent();
5677   return Legalized;
5678 }
5679 
5680 /// Lower a vector extract or insert by writing the vector to a stack temporary
5681 /// and reloading the element or vector.
5682 ///
5683 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
5684 ///  =>
5685 ///  %stack_temp = G_FRAME_INDEX
5686 ///  G_STORE %vec, %stack_temp
5687 ///  %idx = clamp(%idx, %vec.getNumElements())
5688 ///  %element_ptr = G_PTR_ADD %stack_temp, %idx
5689 ///  %dst = G_LOAD %element_ptr
5690 LegalizerHelper::LegalizeResult
5691 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
5692   Register DstReg = MI.getOperand(0).getReg();
5693   Register SrcVec = MI.getOperand(1).getReg();
5694   Register InsertVal;
5695   if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
5696     InsertVal = MI.getOperand(2).getReg();
5697 
5698   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5699 
5700   LLT VecTy = MRI.getType(SrcVec);
5701   LLT EltTy = VecTy.getElementType();
5702   if (!EltTy.isByteSized()) { // Not implemented.
5703     LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
5704     return UnableToLegalize;
5705   }
5706 
5707   unsigned EltBytes = EltTy.getSizeInBytes();
5708   Align VecAlign = getStackTemporaryAlignment(VecTy);
5709   Align EltAlign;
5710 
5711   MachinePointerInfo PtrInfo;
5712   auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
5713                                         VecAlign, PtrInfo);
5714   MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
5715 
5716   // Get the pointer to the element, and be sure not to hit undefined behavior
5717   // if the index is out of bounds.
5718   Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
5719 
5720   int64_t IdxVal;
5721   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
5722     int64_t Offset = IdxVal * EltBytes;
5723     PtrInfo = PtrInfo.getWithOffset(Offset);
5724     EltAlign = commonAlignment(VecAlign, Offset);
5725   } else {
5726     // We lose information with a variable offset.
5727     EltAlign = getStackTemporaryAlignment(EltTy);
5728     PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
5729   }
5730 
5731   if (InsertVal) {
5732     // Write the inserted element
5733     MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
5734 
5735     // Reload the whole vector.
5736     MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
5737   } else {
5738     MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
5739   }
5740 
5741   MI.eraseFromParent();
5742   return Legalized;
5743 }
5744 
5745 LegalizerHelper::LegalizeResult
5746 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
5747   Register DstReg = MI.getOperand(0).getReg();
5748   Register Src0Reg = MI.getOperand(1).getReg();
5749   Register Src1Reg = MI.getOperand(2).getReg();
5750   LLT Src0Ty = MRI.getType(Src0Reg);
5751   LLT DstTy = MRI.getType(DstReg);
5752   LLT IdxTy = LLT::scalar(32);
5753 
5754   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5755 
5756   if (DstTy.isScalar()) {
5757     if (Src0Ty.isVector())
5758       return UnableToLegalize;
5759 
5760     // This is just a SELECT.
5761     assert(Mask.size() == 1 && "Expected a single mask element");
5762     Register Val;
5763     if (Mask[0] < 0 || Mask[0] > 1)
5764       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
5765     else
5766       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
5767     MIRBuilder.buildCopy(DstReg, Val);
5768     MI.eraseFromParent();
5769     return Legalized;
5770   }
5771 
5772   Register Undef;
5773   SmallVector<Register, 32> BuildVec;
5774   LLT EltTy = DstTy.getElementType();
5775 
5776   for (int Idx : Mask) {
5777     if (Idx < 0) {
5778       if (!Undef.isValid())
5779         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
5780       BuildVec.push_back(Undef);
5781       continue;
5782     }
5783 
5784     if (Src0Ty.isScalar()) {
5785       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
5786     } else {
5787       int NumElts = Src0Ty.getNumElements();
5788       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
5789       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
5790       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
5791       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
5792       BuildVec.push_back(Extract.getReg(0));
5793     }
5794   }
5795 
5796   MIRBuilder.buildBuildVector(DstReg, BuildVec);
5797   MI.eraseFromParent();
5798   return Legalized;
5799 }
5800 
5801 LegalizerHelper::LegalizeResult
5802 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
5803   const auto &MF = *MI.getMF();
5804   const auto &TFI = *MF.getSubtarget().getFrameLowering();
5805   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
5806     return UnableToLegalize;
5807 
5808   Register Dst = MI.getOperand(0).getReg();
5809   Register AllocSize = MI.getOperand(1).getReg();
5810   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
5811 
5812   LLT PtrTy = MRI.getType(Dst);
5813   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
5814 
5815   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
5816   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
5817   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
5818 
5819   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
5820   // have to generate an extra instruction to negate the alloc and then use
5821   // G_PTR_ADD to add the negative offset.
5822   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
5823   if (Alignment > Align(1)) {
5824     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
5825     AlignMask.negate();
5826     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
5827     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
5828   }
5829 
5830   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
5831   MIRBuilder.buildCopy(SPReg, SPTmp);
5832   MIRBuilder.buildCopy(Dst, SPTmp);
5833 
5834   MI.eraseFromParent();
5835   return Legalized;
5836 }
5837 
5838 LegalizerHelper::LegalizeResult
5839 LegalizerHelper::lowerExtract(MachineInstr &MI) {
5840   Register Dst = MI.getOperand(0).getReg();
5841   Register Src = MI.getOperand(1).getReg();
5842   unsigned Offset = MI.getOperand(2).getImm();
5843 
5844   LLT DstTy = MRI.getType(Dst);
5845   LLT SrcTy = MRI.getType(Src);
5846 
5847   if (DstTy.isScalar() &&
5848       (SrcTy.isScalar() ||
5849        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
5850     LLT SrcIntTy = SrcTy;
5851     if (!SrcTy.isScalar()) {
5852       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
5853       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
5854     }
5855 
5856     if (Offset == 0)
5857       MIRBuilder.buildTrunc(Dst, Src);
5858     else {
5859       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
5860       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
5861       MIRBuilder.buildTrunc(Dst, Shr);
5862     }
5863 
5864     MI.eraseFromParent();
5865     return Legalized;
5866   }
5867 
5868   return UnableToLegalize;
5869 }
5870 
5871 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
5872   Register Dst = MI.getOperand(0).getReg();
5873   Register Src = MI.getOperand(1).getReg();
5874   Register InsertSrc = MI.getOperand(2).getReg();
5875   uint64_t Offset = MI.getOperand(3).getImm();
5876 
5877   LLT DstTy = MRI.getType(Src);
5878   LLT InsertTy = MRI.getType(InsertSrc);
5879 
5880   if (InsertTy.isVector() ||
5881       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
5882     return UnableToLegalize;
5883 
5884   const DataLayout &DL = MIRBuilder.getDataLayout();
5885   if ((DstTy.isPointer() &&
5886        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
5887       (InsertTy.isPointer() &&
5888        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
5889     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
5890     return UnableToLegalize;
5891   }
5892 
5893   LLT IntDstTy = DstTy;
5894 
5895   if (!DstTy.isScalar()) {
5896     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
5897     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
5898   }
5899 
5900   if (!InsertTy.isScalar()) {
5901     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
5902     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
5903   }
5904 
5905   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
5906   if (Offset != 0) {
5907     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
5908     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
5909   }
5910 
5911   APInt MaskVal = APInt::getBitsSetWithWrap(
5912       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
5913 
5914   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
5915   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
5916   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
5917 
5918   MIRBuilder.buildCast(Dst, Or);
5919   MI.eraseFromParent();
5920   return Legalized;
5921 }
5922 
5923 LegalizerHelper::LegalizeResult
5924 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
5925   Register Dst0 = MI.getOperand(0).getReg();
5926   Register Dst1 = MI.getOperand(1).getReg();
5927   Register LHS = MI.getOperand(2).getReg();
5928   Register RHS = MI.getOperand(3).getReg();
5929   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
5930 
5931   LLT Ty = MRI.getType(Dst0);
5932   LLT BoolTy = MRI.getType(Dst1);
5933 
5934   if (IsAdd)
5935     MIRBuilder.buildAdd(Dst0, LHS, RHS);
5936   else
5937     MIRBuilder.buildSub(Dst0, LHS, RHS);
5938 
5939   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
5940 
5941   auto Zero = MIRBuilder.buildConstant(Ty, 0);
5942 
5943   // For an addition, the result should be less than one of the operands (LHS)
5944   // if and only if the other operand (RHS) is negative, otherwise there will
5945   // be overflow.
5946   // For a subtraction, the result should be less than one of the operands
5947   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
5948   // otherwise there will be overflow.
5949   auto ResultLowerThanLHS =
5950       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
5951   auto ConditionRHS = MIRBuilder.buildICmp(
5952       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
5953 
5954   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
5955   MI.eraseFromParent();
5956   return Legalized;
5957 }
5958 
5959 LegalizerHelper::LegalizeResult
5960 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
5961   Register Res = MI.getOperand(0).getReg();
5962   Register LHS = MI.getOperand(1).getReg();
5963   Register RHS = MI.getOperand(2).getReg();
5964   LLT Ty = MRI.getType(Res);
5965   bool IsSigned;
5966   bool IsAdd;
5967   unsigned BaseOp;
5968   switch (MI.getOpcode()) {
5969   default:
5970     llvm_unreachable("unexpected addsat/subsat opcode");
5971   case TargetOpcode::G_UADDSAT:
5972     IsSigned = false;
5973     IsAdd = true;
5974     BaseOp = TargetOpcode::G_ADD;
5975     break;
5976   case TargetOpcode::G_SADDSAT:
5977     IsSigned = true;
5978     IsAdd = true;
5979     BaseOp = TargetOpcode::G_ADD;
5980     break;
5981   case TargetOpcode::G_USUBSAT:
5982     IsSigned = false;
5983     IsAdd = false;
5984     BaseOp = TargetOpcode::G_SUB;
5985     break;
5986   case TargetOpcode::G_SSUBSAT:
5987     IsSigned = true;
5988     IsAdd = false;
5989     BaseOp = TargetOpcode::G_SUB;
5990     break;
5991   }
5992 
5993   if (IsSigned) {
5994     // sadd.sat(a, b) ->
5995     //   hi = 0x7fffffff - smax(a, 0)
5996     //   lo = 0x80000000 - smin(a, 0)
5997     //   a + smin(smax(lo, b), hi)
5998     // ssub.sat(a, b) ->
5999     //   lo = smax(a, -1) - 0x7fffffff
6000     //   hi = smin(a, -1) - 0x80000000
6001     //   a - smin(smax(lo, b), hi)
6002     // TODO: AMDGPU can use a "median of 3" instruction here:
6003     //   a +/- med3(lo, b, hi)
6004     uint64_t NumBits = Ty.getScalarSizeInBits();
6005     auto MaxVal =
6006         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
6007     auto MinVal =
6008         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6009     MachineInstrBuilder Hi, Lo;
6010     if (IsAdd) {
6011       auto Zero = MIRBuilder.buildConstant(Ty, 0);
6012       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
6013       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
6014     } else {
6015       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
6016       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
6017                                MaxVal);
6018       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
6019                                MinVal);
6020     }
6021     auto RHSClamped =
6022         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
6023     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
6024   } else {
6025     // uadd.sat(a, b) -> a + umin(~a, b)
6026     // usub.sat(a, b) -> a - umin(a, b)
6027     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
6028     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
6029     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
6030   }
6031 
6032   MI.eraseFromParent();
6033   return Legalized;
6034 }
6035 
6036 LegalizerHelper::LegalizeResult
6037 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
6038   Register Res = MI.getOperand(0).getReg();
6039   Register LHS = MI.getOperand(1).getReg();
6040   Register RHS = MI.getOperand(2).getReg();
6041   LLT Ty = MRI.getType(Res);
6042   LLT BoolTy = Ty.changeElementSize(1);
6043   bool IsSigned;
6044   bool IsAdd;
6045   unsigned OverflowOp;
6046   switch (MI.getOpcode()) {
6047   default:
6048     llvm_unreachable("unexpected addsat/subsat opcode");
6049   case TargetOpcode::G_UADDSAT:
6050     IsSigned = false;
6051     IsAdd = true;
6052     OverflowOp = TargetOpcode::G_UADDO;
6053     break;
6054   case TargetOpcode::G_SADDSAT:
6055     IsSigned = true;
6056     IsAdd = true;
6057     OverflowOp = TargetOpcode::G_SADDO;
6058     break;
6059   case TargetOpcode::G_USUBSAT:
6060     IsSigned = false;
6061     IsAdd = false;
6062     OverflowOp = TargetOpcode::G_USUBO;
6063     break;
6064   case TargetOpcode::G_SSUBSAT:
6065     IsSigned = true;
6066     IsAdd = false;
6067     OverflowOp = TargetOpcode::G_SSUBO;
6068     break;
6069   }
6070 
6071   auto OverflowRes =
6072       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
6073   Register Tmp = OverflowRes.getReg(0);
6074   Register Ov = OverflowRes.getReg(1);
6075   MachineInstrBuilder Clamp;
6076   if (IsSigned) {
6077     // sadd.sat(a, b) ->
6078     //   {tmp, ov} = saddo(a, b)
6079     //   ov ? (tmp >>s 31) + 0x80000000 : r
6080     // ssub.sat(a, b) ->
6081     //   {tmp, ov} = ssubo(a, b)
6082     //   ov ? (tmp >>s 31) + 0x80000000 : r
6083     uint64_t NumBits = Ty.getScalarSizeInBits();
6084     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
6085     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
6086     auto MinVal =
6087         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6088     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
6089   } else {
6090     // uadd.sat(a, b) ->
6091     //   {tmp, ov} = uaddo(a, b)
6092     //   ov ? 0xffffffff : tmp
6093     // usub.sat(a, b) ->
6094     //   {tmp, ov} = usubo(a, b)
6095     //   ov ? 0 : tmp
6096     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
6097   }
6098   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
6099 
6100   MI.eraseFromParent();
6101   return Legalized;
6102 }
6103 
6104 LegalizerHelper::LegalizeResult
6105 LegalizerHelper::lowerShlSat(MachineInstr &MI) {
6106   assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
6107           MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
6108          "Expected shlsat opcode!");
6109   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
6110   Register Res = MI.getOperand(0).getReg();
6111   Register LHS = MI.getOperand(1).getReg();
6112   Register RHS = MI.getOperand(2).getReg();
6113   LLT Ty = MRI.getType(Res);
6114   LLT BoolTy = Ty.changeElementSize(1);
6115 
6116   unsigned BW = Ty.getScalarSizeInBits();
6117   auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
6118   auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
6119                        : MIRBuilder.buildLShr(Ty, Result, RHS);
6120 
6121   MachineInstrBuilder SatVal;
6122   if (IsSigned) {
6123     auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
6124     auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
6125     auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
6126                                     MIRBuilder.buildConstant(Ty, 0));
6127     SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
6128   } else {
6129     SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
6130   }
6131   auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
6132   MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
6133 
6134   MI.eraseFromParent();
6135   return Legalized;
6136 }
6137 
6138 LegalizerHelper::LegalizeResult
6139 LegalizerHelper::lowerBswap(MachineInstr &MI) {
6140   Register Dst = MI.getOperand(0).getReg();
6141   Register Src = MI.getOperand(1).getReg();
6142   const LLT Ty = MRI.getType(Src);
6143   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
6144   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
6145 
6146   // Swap most and least significant byte, set remaining bytes in Res to zero.
6147   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
6148   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
6149   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6150   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
6151 
6152   // Set i-th high/low byte in Res to i-th low/high byte from Src.
6153   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
6154     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
6155     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
6156     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
6157     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
6158     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
6159     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
6160     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
6161     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
6162     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
6163     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6164     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
6165     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
6166   }
6167   Res.getInstr()->getOperand(0).setReg(Dst);
6168 
6169   MI.eraseFromParent();
6170   return Legalized;
6171 }
6172 
6173 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
6174 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
6175                                  MachineInstrBuilder Src, APInt Mask) {
6176   const LLT Ty = Dst.getLLTTy(*B.getMRI());
6177   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
6178   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
6179   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
6180   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
6181   return B.buildOr(Dst, LHS, RHS);
6182 }
6183 
6184 LegalizerHelper::LegalizeResult
6185 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
6186   Register Dst = MI.getOperand(0).getReg();
6187   Register Src = MI.getOperand(1).getReg();
6188   const LLT Ty = MRI.getType(Src);
6189   unsigned Size = Ty.getSizeInBits();
6190 
6191   MachineInstrBuilder BSWAP =
6192       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
6193 
6194   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
6195   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
6196   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
6197   MachineInstrBuilder Swap4 =
6198       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
6199 
6200   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
6201   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
6202   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
6203   MachineInstrBuilder Swap2 =
6204       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
6205 
6206   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
6207   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
6208   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
6209   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
6210 
6211   MI.eraseFromParent();
6212   return Legalized;
6213 }
6214 
6215 LegalizerHelper::LegalizeResult
6216 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
6217   MachineFunction &MF = MIRBuilder.getMF();
6218 
6219   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
6220   int NameOpIdx = IsRead ? 1 : 0;
6221   int ValRegIndex = IsRead ? 0 : 1;
6222 
6223   Register ValReg = MI.getOperand(ValRegIndex).getReg();
6224   const LLT Ty = MRI.getType(ValReg);
6225   const MDString *RegStr = cast<MDString>(
6226     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
6227 
6228   Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
6229   if (!PhysReg.isValid())
6230     return UnableToLegalize;
6231 
6232   if (IsRead)
6233     MIRBuilder.buildCopy(ValReg, PhysReg);
6234   else
6235     MIRBuilder.buildCopy(PhysReg, ValReg);
6236 
6237   MI.eraseFromParent();
6238   return Legalized;
6239 }
6240 
6241 LegalizerHelper::LegalizeResult
6242 LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
6243   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
6244   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
6245   Register Result = MI.getOperand(0).getReg();
6246   LLT OrigTy = MRI.getType(Result);
6247   auto SizeInBits = OrigTy.getScalarSizeInBits();
6248   LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
6249 
6250   auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
6251   auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
6252   auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
6253   unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
6254 
6255   auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
6256   auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
6257   MIRBuilder.buildTrunc(Result, Shifted);
6258 
6259   MI.eraseFromParent();
6260   return Legalized;
6261 }
6262 
6263 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
6264   // Implement vector G_SELECT in terms of XOR, AND, OR.
6265   Register DstReg = MI.getOperand(0).getReg();
6266   Register MaskReg = MI.getOperand(1).getReg();
6267   Register Op1Reg = MI.getOperand(2).getReg();
6268   Register Op2Reg = MI.getOperand(3).getReg();
6269   LLT DstTy = MRI.getType(DstReg);
6270   LLT MaskTy = MRI.getType(MaskReg);
6271   LLT Op1Ty = MRI.getType(Op1Reg);
6272   if (!DstTy.isVector())
6273     return UnableToLegalize;
6274 
6275   // Vector selects can have a scalar predicate. If so, splat into a vector and
6276   // finish for later legalization attempts to try again.
6277   if (MaskTy.isScalar()) {
6278     Register MaskElt = MaskReg;
6279     if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
6280       MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
6281     // Generate a vector splat idiom to be pattern matched later.
6282     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
6283     Observer.changingInstr(MI);
6284     MI.getOperand(1).setReg(ShufSplat.getReg(0));
6285     Observer.changedInstr(MI);
6286     return Legalized;
6287   }
6288 
6289   if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
6290     return UnableToLegalize;
6291   }
6292 
6293   auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
6294   auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
6295   auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
6296   MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
6297   MI.eraseFromParent();
6298   return Legalized;
6299 }
6300