1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
33                                  GISelChangeObserver &Observer,
34                                  MachineIRBuilder &Builder)
35     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
36       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
37   MIRBuilder.setMF(MF);
38   MIRBuilder.setChangeObserver(Observer);
39 }
40 
41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
42                                  GISelChangeObserver &Observer,
43                                  MachineIRBuilder &B)
44     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
45   MIRBuilder.setMF(MF);
46   MIRBuilder.setChangeObserver(Observer);
47 }
48 LegalizerHelper::LegalizeResult
49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
50   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
51 
52   auto Step = LI.getAction(MI, MRI);
53   switch (Step.Action) {
54   case Legal:
55     LLVM_DEBUG(dbgs() << ".. Already legal\n");
56     return AlreadyLegal;
57   case Libcall:
58     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
59     return libcall(MI);
60   case NarrowScalar:
61     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
62     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
63   case WidenScalar:
64     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
65     return widenScalar(MI, Step.TypeIdx, Step.NewType);
66   case Lower:
67     LLVM_DEBUG(dbgs() << ".. Lower\n");
68     return lower(MI, Step.TypeIdx, Step.NewType);
69   case FewerElements:
70     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
71     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
72   case Custom:
73     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
74     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
75                                                             : UnableToLegalize;
76   default:
77     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
78     return UnableToLegalize;
79   }
80 }
81 
82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
83                                    SmallVectorImpl<unsigned> &VRegs) {
84   for (int i = 0; i < NumParts; ++i)
85     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
86   MIRBuilder.buildUnmerge(VRegs, Reg);
87 }
88 
89 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
90   switch (Opcode) {
91   case TargetOpcode::G_SDIV:
92     assert((Size == 32 || Size == 64) && "Unsupported size");
93     return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
94   case TargetOpcode::G_UDIV:
95     assert((Size == 32 || Size == 64) && "Unsupported size");
96     return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
97   case TargetOpcode::G_SREM:
98     assert((Size == 32 || Size == 64) && "Unsupported size");
99     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
100   case TargetOpcode::G_UREM:
101     assert((Size == 32 || Size == 64) && "Unsupported size");
102     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
103   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
104     assert(Size == 32 && "Unsupported size");
105     return RTLIB::CTLZ_I32;
106   case TargetOpcode::G_FADD:
107     assert((Size == 32 || Size == 64) && "Unsupported size");
108     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
109   case TargetOpcode::G_FSUB:
110     assert((Size == 32 || Size == 64) && "Unsupported size");
111     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
112   case TargetOpcode::G_FMUL:
113     assert((Size == 32 || Size == 64) && "Unsupported size");
114     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
115   case TargetOpcode::G_FDIV:
116     assert((Size == 32 || Size == 64) && "Unsupported size");
117     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
118   case TargetOpcode::G_FREM:
119     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
120   case TargetOpcode::G_FPOW:
121     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
122   case TargetOpcode::G_FMA:
123     assert((Size == 32 || Size == 64) && "Unsupported size");
124     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
125   }
126   llvm_unreachable("Unknown libcall function");
127 }
128 
129 LegalizerHelper::LegalizeResult
130 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
131                     const CallLowering::ArgInfo &Result,
132                     ArrayRef<CallLowering::ArgInfo> Args) {
133   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
134   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
135   const char *Name = TLI.getLibcallName(Libcall);
136 
137   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
138   if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
139                      MachineOperand::CreateES(Name), Result, Args))
140     return LegalizerHelper::UnableToLegalize;
141 
142   return LegalizerHelper::Legalized;
143 }
144 
145 // Useful for libcalls where all operands have the same type.
146 static LegalizerHelper::LegalizeResult
147 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
148               Type *OpType) {
149   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
150 
151   SmallVector<CallLowering::ArgInfo, 3> Args;
152   for (unsigned i = 1; i < MI.getNumOperands(); i++)
153     Args.push_back({MI.getOperand(i).getReg(), OpType});
154   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
155                        Args);
156 }
157 
158 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
159                                        Type *FromType) {
160   auto ToMVT = MVT::getVT(ToType);
161   auto FromMVT = MVT::getVT(FromType);
162 
163   switch (Opcode) {
164   case TargetOpcode::G_FPEXT:
165     return RTLIB::getFPEXT(FromMVT, ToMVT);
166   case TargetOpcode::G_FPTRUNC:
167     return RTLIB::getFPROUND(FromMVT, ToMVT);
168   case TargetOpcode::G_FPTOSI:
169     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
170   case TargetOpcode::G_FPTOUI:
171     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
172   case TargetOpcode::G_SITOFP:
173     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
174   case TargetOpcode::G_UITOFP:
175     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
176   }
177   llvm_unreachable("Unsupported libcall function");
178 }
179 
180 static LegalizerHelper::LegalizeResult
181 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
182                   Type *FromType) {
183   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
184   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
185                        {{MI.getOperand(1).getReg(), FromType}});
186 }
187 
188 LegalizerHelper::LegalizeResult
189 LegalizerHelper::libcall(MachineInstr &MI) {
190   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
191   unsigned Size = LLTy.getSizeInBits();
192   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
193 
194   MIRBuilder.setInstr(MI);
195 
196   switch (MI.getOpcode()) {
197   default:
198     return UnableToLegalize;
199   case TargetOpcode::G_SDIV:
200   case TargetOpcode::G_UDIV:
201   case TargetOpcode::G_SREM:
202   case TargetOpcode::G_UREM:
203   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
204     Type *HLTy = IntegerType::get(Ctx, Size);
205     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
206     if (Status != Legalized)
207       return Status;
208     break;
209   }
210   case TargetOpcode::G_FADD:
211   case TargetOpcode::G_FSUB:
212   case TargetOpcode::G_FMUL:
213   case TargetOpcode::G_FDIV:
214   case TargetOpcode::G_FMA:
215   case TargetOpcode::G_FPOW:
216   case TargetOpcode::G_FREM: {
217     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
218     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
219     if (Status != Legalized)
220       return Status;
221     break;
222   }
223   case TargetOpcode::G_FPEXT: {
224     // FIXME: Support other floating point types (half, fp128 etc)
225     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
226     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
227     if (ToSize != 64 || FromSize != 32)
228       return UnableToLegalize;
229     LegalizeResult Status = conversionLibcall(
230         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
231     if (Status != Legalized)
232       return Status;
233     break;
234   }
235   case TargetOpcode::G_FPTRUNC: {
236     // FIXME: Support other floating point types (half, fp128 etc)
237     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
238     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
239     if (ToSize != 32 || FromSize != 64)
240       return UnableToLegalize;
241     LegalizeResult Status = conversionLibcall(
242         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
243     if (Status != Legalized)
244       return Status;
245     break;
246   }
247   case TargetOpcode::G_FPTOSI:
248   case TargetOpcode::G_FPTOUI: {
249     // FIXME: Support other types
250     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
251     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
252     if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
253       return UnableToLegalize;
254     LegalizeResult Status = conversionLibcall(
255         MI, MIRBuilder, Type::getInt32Ty(Ctx),
256         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
257     if (Status != Legalized)
258       return Status;
259     break;
260   }
261   case TargetOpcode::G_SITOFP:
262   case TargetOpcode::G_UITOFP: {
263     // FIXME: Support other types
264     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
265     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
266     if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
267       return UnableToLegalize;
268     LegalizeResult Status = conversionLibcall(
269         MI, MIRBuilder,
270         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
271         Type::getInt32Ty(Ctx));
272     if (Status != Legalized)
273       return Status;
274     break;
275   }
276   }
277 
278   MI.eraseFromParent();
279   return Legalized;
280 }
281 
282 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
283                                                               unsigned TypeIdx,
284                                                               LLT NarrowTy) {
285   // FIXME: Don't know how to handle secondary types yet.
286   if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
287     return UnableToLegalize;
288 
289   MIRBuilder.setInstr(MI);
290 
291   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
292   uint64_t NarrowSize = NarrowTy.getSizeInBits();
293 
294   switch (MI.getOpcode()) {
295   default:
296     return UnableToLegalize;
297   case TargetOpcode::G_IMPLICIT_DEF: {
298     // FIXME: add support for when SizeOp0 isn't an exact multiple of
299     // NarrowSize.
300     if (SizeOp0 % NarrowSize != 0)
301       return UnableToLegalize;
302     int NumParts = SizeOp0 / NarrowSize;
303 
304     SmallVector<unsigned, 2> DstRegs;
305     for (int i = 0; i < NumParts; ++i)
306       DstRegs.push_back(
307           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
308 
309     unsigned DstReg = MI.getOperand(0).getReg();
310     if(MRI.getType(DstReg).isVector())
311       MIRBuilder.buildBuildVector(DstReg, DstRegs);
312     else
313       MIRBuilder.buildMerge(DstReg, DstRegs);
314     MI.eraseFromParent();
315     return Legalized;
316   }
317   case TargetOpcode::G_ADD: {
318     // FIXME: add support for when SizeOp0 isn't an exact multiple of
319     // NarrowSize.
320     if (SizeOp0 % NarrowSize != 0)
321       return UnableToLegalize;
322     // Expand in terms of carry-setting/consuming G_ADDE instructions.
323     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
324 
325     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
326     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
327     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
328 
329     unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
330     MIRBuilder.buildConstant(CarryIn, 0);
331 
332     for (int i = 0; i < NumParts; ++i) {
333       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
334       unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
335 
336       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
337                             Src2Regs[i], CarryIn);
338 
339       DstRegs.push_back(DstReg);
340       CarryIn = CarryOut;
341     }
342     unsigned DstReg = MI.getOperand(0).getReg();
343     if(MRI.getType(DstReg).isVector())
344       MIRBuilder.buildBuildVector(DstReg, DstRegs);
345     else
346       MIRBuilder.buildMerge(DstReg, DstRegs);
347     MI.eraseFromParent();
348     return Legalized;
349   }
350   case TargetOpcode::G_EXTRACT: {
351     if (TypeIdx != 1)
352       return UnableToLegalize;
353 
354     int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
355     // FIXME: add support for when SizeOp1 isn't an exact multiple of
356     // NarrowSize.
357     if (SizeOp1 % NarrowSize != 0)
358       return UnableToLegalize;
359     int NumParts = SizeOp1 / NarrowSize;
360 
361     SmallVector<unsigned, 2> SrcRegs, DstRegs;
362     SmallVector<uint64_t, 2> Indexes;
363     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
364 
365     unsigned OpReg = MI.getOperand(0).getReg();
366     uint64_t OpStart = MI.getOperand(2).getImm();
367     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
368     for (int i = 0; i < NumParts; ++i) {
369       unsigned SrcStart = i * NarrowSize;
370 
371       if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
372         // No part of the extract uses this subregister, ignore it.
373         continue;
374       } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
375         // The entire subregister is extracted, forward the value.
376         DstRegs.push_back(SrcRegs[i]);
377         continue;
378       }
379 
380       // OpSegStart is where this destination segment would start in OpReg if it
381       // extended infinitely in both directions.
382       int64_t ExtractOffset;
383       uint64_t SegSize;
384       if (OpStart < SrcStart) {
385         ExtractOffset = 0;
386         SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
387       } else {
388         ExtractOffset = OpStart - SrcStart;
389         SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
390       }
391 
392       unsigned SegReg = SrcRegs[i];
393       if (ExtractOffset != 0 || SegSize != NarrowSize) {
394         // A genuine extract is needed.
395         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
396         MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
397       }
398 
399       DstRegs.push_back(SegReg);
400     }
401 
402     unsigned DstReg = MI.getOperand(0).getReg();
403     if(MRI.getType(DstReg).isVector())
404       MIRBuilder.buildBuildVector(DstReg, DstRegs);
405     else
406       MIRBuilder.buildMerge(DstReg, DstRegs);
407     MI.eraseFromParent();
408     return Legalized;
409   }
410   case TargetOpcode::G_INSERT: {
411     // FIXME: add support for when SizeOp0 isn't an exact multiple of
412     // NarrowSize.
413     if (SizeOp0 % NarrowSize != 0)
414       return UnableToLegalize;
415 
416     int NumParts = SizeOp0 / NarrowSize;
417 
418     SmallVector<unsigned, 2> SrcRegs, DstRegs;
419     SmallVector<uint64_t, 2> Indexes;
420     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
421 
422     unsigned OpReg = MI.getOperand(2).getReg();
423     uint64_t OpStart = MI.getOperand(3).getImm();
424     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
425     for (int i = 0; i < NumParts; ++i) {
426       unsigned DstStart = i * NarrowSize;
427 
428       if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
429         // No part of the insert affects this subregister, forward the original.
430         DstRegs.push_back(SrcRegs[i]);
431         continue;
432       } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
433         // The entire subregister is defined by this insert, forward the new
434         // value.
435         DstRegs.push_back(OpReg);
436         continue;
437       }
438 
439       // OpSegStart is where this destination segment would start in OpReg if it
440       // extended infinitely in both directions.
441       int64_t ExtractOffset, InsertOffset;
442       uint64_t SegSize;
443       if (OpStart < DstStart) {
444         InsertOffset = 0;
445         ExtractOffset = DstStart - OpStart;
446         SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
447       } else {
448         InsertOffset = OpStart - DstStart;
449         ExtractOffset = 0;
450         SegSize =
451             std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
452       }
453 
454       unsigned SegReg = OpReg;
455       if (ExtractOffset != 0 || SegSize != OpSize) {
456         // A genuine extract is needed.
457         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
458         MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
459       }
460 
461       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
462       MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
463       DstRegs.push_back(DstReg);
464     }
465 
466     assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
467     unsigned DstReg = MI.getOperand(0).getReg();
468     if(MRI.getType(DstReg).isVector())
469       MIRBuilder.buildBuildVector(DstReg, DstRegs);
470     else
471       MIRBuilder.buildMerge(DstReg, DstRegs);
472     MI.eraseFromParent();
473     return Legalized;
474   }
475   case TargetOpcode::G_LOAD: {
476     // FIXME: add support for when SizeOp0 isn't an exact multiple of
477     // NarrowSize.
478     if (SizeOp0 % NarrowSize != 0)
479       return UnableToLegalize;
480 
481     const auto &MMO = **MI.memoperands_begin();
482     // This implementation doesn't work for atomics. Give up instead of doing
483     // something invalid.
484     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
485         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
486       return UnableToLegalize;
487 
488     int NumParts = SizeOp0 / NarrowSize;
489     LLT OffsetTy = LLT::scalar(
490         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
491 
492     SmallVector<unsigned, 2> DstRegs;
493     for (int i = 0; i < NumParts; ++i) {
494       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
495       unsigned SrcReg = 0;
496       unsigned Adjustment = i * NarrowSize / 8;
497       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
498 
499       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
500           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
501           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
502           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
503 
504       MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
505                                 Adjustment);
506 
507       MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
508 
509       DstRegs.push_back(DstReg);
510     }
511     unsigned DstReg = MI.getOperand(0).getReg();
512     if(MRI.getType(DstReg).isVector())
513       MIRBuilder.buildBuildVector(DstReg, DstRegs);
514     else
515       MIRBuilder.buildMerge(DstReg, DstRegs);
516     MI.eraseFromParent();
517     return Legalized;
518   }
519   case TargetOpcode::G_STORE: {
520     // FIXME: add support for when SizeOp0 isn't an exact multiple of
521     // NarrowSize.
522     if (SizeOp0 % NarrowSize != 0)
523       return UnableToLegalize;
524 
525     const auto &MMO = **MI.memoperands_begin();
526     // This implementation doesn't work for atomics. Give up instead of doing
527     // something invalid.
528     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
529         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
530       return UnableToLegalize;
531 
532     int NumParts = SizeOp0 / NarrowSize;
533     LLT OffsetTy = LLT::scalar(
534         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
535 
536     SmallVector<unsigned, 2> SrcRegs;
537     extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
538 
539     for (int i = 0; i < NumParts; ++i) {
540       unsigned DstReg = 0;
541       unsigned Adjustment = i * NarrowSize / 8;
542       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
543 
544       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
545           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
546           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
547           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
548 
549       MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
550                                 Adjustment);
551 
552       MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
553     }
554     MI.eraseFromParent();
555     return Legalized;
556   }
557   case TargetOpcode::G_CONSTANT: {
558     // FIXME: add support for when SizeOp0 isn't an exact multiple of
559     // NarrowSize.
560     if (SizeOp0 % NarrowSize != 0)
561       return UnableToLegalize;
562     int NumParts = SizeOp0 / NarrowSize;
563     const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
564     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
565 
566     SmallVector<unsigned, 2> DstRegs;
567     for (int i = 0; i < NumParts; ++i) {
568       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
569       ConstantInt *CI =
570           ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
571       MIRBuilder.buildConstant(DstReg, *CI);
572       DstRegs.push_back(DstReg);
573     }
574     unsigned DstReg = MI.getOperand(0).getReg();
575     if(MRI.getType(DstReg).isVector())
576       MIRBuilder.buildBuildVector(DstReg, DstRegs);
577     else
578       MIRBuilder.buildMerge(DstReg, DstRegs);
579     MI.eraseFromParent();
580     return Legalized;
581   }
582   case TargetOpcode::G_AND:
583   case TargetOpcode::G_OR:
584   case TargetOpcode::G_XOR: {
585     // Legalize bitwise operation:
586     // A = BinOp<Ty> B, C
587     // into:
588     // B1, ..., BN = G_UNMERGE_VALUES B
589     // C1, ..., CN = G_UNMERGE_VALUES C
590     // A1 = BinOp<Ty/N> B1, C2
591     // ...
592     // AN = BinOp<Ty/N> BN, CN
593     // A = G_MERGE_VALUES A1, ..., AN
594 
595     // FIXME: add support for when SizeOp0 isn't an exact multiple of
596     // NarrowSize.
597     if (SizeOp0 % NarrowSize != 0)
598       return UnableToLegalize;
599     int NumParts = SizeOp0 / NarrowSize;
600 
601     // List the registers where the destination will be scattered.
602     SmallVector<unsigned, 2> DstRegs;
603     // List the registers where the first argument will be split.
604     SmallVector<unsigned, 2> SrcsReg1;
605     // List the registers where the second argument will be split.
606     SmallVector<unsigned, 2> SrcsReg2;
607     // Create all the temporary registers.
608     for (int i = 0; i < NumParts; ++i) {
609       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
610       unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
611       unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
612 
613       DstRegs.push_back(DstReg);
614       SrcsReg1.push_back(SrcReg1);
615       SrcsReg2.push_back(SrcReg2);
616     }
617     // Explode the big arguments into smaller chunks.
618     MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
619     MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
620 
621     // Do the operation on each small part.
622     for (int i = 0; i < NumParts; ++i)
623       MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
624                             {SrcsReg1[i], SrcsReg2[i]});
625 
626     // Gather the destination registers into the final destination.
627     unsigned DstReg = MI.getOperand(0).getReg();
628     if(MRI.getType(DstReg).isVector())
629       MIRBuilder.buildBuildVector(DstReg, DstRegs);
630     else
631       MIRBuilder.buildMerge(DstReg, DstRegs);
632     MI.eraseFromParent();
633     return Legalized;
634   }
635   }
636 }
637 
638 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
639                                      unsigned OpIdx, unsigned ExtOpcode) {
640   MachineOperand &MO = MI.getOperand(OpIdx);
641   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
642   MO.setReg(ExtB->getOperand(0).getReg());
643 }
644 
645 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
646                                      unsigned OpIdx, unsigned TruncOpcode) {
647   MachineOperand &MO = MI.getOperand(OpIdx);
648   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
649   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
650   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
651   MO.setReg(DstExt);
652 }
653 
654 LegalizerHelper::LegalizeResult
655 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
656   MIRBuilder.setInstr(MI);
657 
658   switch (MI.getOpcode()) {
659   default:
660     return UnableToLegalize;
661   case TargetOpcode::G_UADDO:
662   case TargetOpcode::G_USUBO: {
663     if (TypeIdx == 1)
664       return UnableToLegalize; // TODO
665     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
666                                          {MI.getOperand(2).getReg()});
667     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
668                                          {MI.getOperand(3).getReg()});
669     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
670                           ? TargetOpcode::G_ADD
671                           : TargetOpcode::G_SUB;
672     // Do the arithmetic in the larger type.
673     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
674     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
675     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
676     auto AndOp = MIRBuilder.buildInstr(
677         TargetOpcode::G_AND, {WideTy},
678         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
679     // There is no overflow if the AndOp is the same as NewOp.
680     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
681                          AndOp);
682     // Now trunc the NewOp to the original result.
683     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
684     MI.eraseFromParent();
685     return Legalized;
686   }
687   case TargetOpcode::G_CTTZ:
688   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
689   case TargetOpcode::G_CTLZ:
690   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
691   case TargetOpcode::G_CTPOP: {
692     // First ZEXT the input.
693     auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
694     LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
695     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
696       // The count is the same in the larger type except if the original
697       // value was zero.  This can be handled by setting the bit just off
698       // the top of the original type.
699       auto TopBit =
700           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
701       MIBSrc = MIRBuilder.buildInstr(
702           TargetOpcode::G_OR, {WideTy},
703           {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
704     }
705     // Perform the operation at the larger size.
706     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
707     // This is already the correct result for CTPOP and CTTZs
708     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
709         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
710       // The correct result is NewOp - (Difference in widety and current ty).
711       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
712       MIBNewOp = MIRBuilder.buildInstr(
713           TargetOpcode::G_SUB, {WideTy},
714           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
715     }
716     auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
717     // Make the original instruction a trunc now, and update its source.
718     Observer.changingInstr(MI);
719     MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
720     MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
721     Observer.changedInstr(MI);
722     return Legalized;
723   }
724 
725   case TargetOpcode::G_ADD:
726   case TargetOpcode::G_AND:
727   case TargetOpcode::G_MUL:
728   case TargetOpcode::G_OR:
729   case TargetOpcode::G_XOR:
730   case TargetOpcode::G_SUB:
731     // Perform operation at larger width (any extension is fine here, high bits
732     // don't affect the result) and then truncate the result back to the
733     // original type.
734     Observer.changingInstr(MI);
735     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
736     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
737     widenScalarDst(MI, WideTy);
738     Observer.changedInstr(MI);
739     return Legalized;
740 
741   case TargetOpcode::G_SHL:
742     Observer.changingInstr(MI);
743     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
744     // The "number of bits to shift" operand must preserve its value as an
745     // unsigned integer:
746     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
747     widenScalarDst(MI, WideTy);
748     Observer.changedInstr(MI);
749     return Legalized;
750 
751   case TargetOpcode::G_SDIV:
752   case TargetOpcode::G_SREM:
753     Observer.changingInstr(MI);
754     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
755     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
756     widenScalarDst(MI, WideTy);
757     Observer.changedInstr(MI);
758     return Legalized;
759 
760   case TargetOpcode::G_ASHR:
761     Observer.changingInstr(MI);
762     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
763     // The "number of bits to shift" operand must preserve its value as an
764     // unsigned integer:
765     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
766     widenScalarDst(MI, WideTy);
767     Observer.changedInstr(MI);
768     return Legalized;
769 
770   case TargetOpcode::G_UDIV:
771   case TargetOpcode::G_UREM:
772   case TargetOpcode::G_LSHR:
773     Observer.changingInstr(MI);
774     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
775     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
776     widenScalarDst(MI, WideTy);
777     Observer.changedInstr(MI);
778     return Legalized;
779 
780   case TargetOpcode::G_SELECT:
781     Observer.changingInstr(MI);
782     if (TypeIdx == 0) {
783       // Perform operation at larger width (any extension is fine here, high
784       // bits don't affect the result) and then truncate the result back to the
785       // original type.
786       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
787       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
788       widenScalarDst(MI, WideTy);
789     } else {
790       // Explicit extension is required here since high bits affect the result.
791       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
792     }
793     Observer.changedInstr(MI);
794     return Legalized;
795 
796   case TargetOpcode::G_FPTOSI:
797   case TargetOpcode::G_FPTOUI:
798     if (TypeIdx != 0)
799       return UnableToLegalize;
800     Observer.changingInstr(MI);
801     widenScalarDst(MI, WideTy);
802     Observer.changedInstr(MI);
803     return Legalized;
804 
805   case TargetOpcode::G_SITOFP:
806     if (TypeIdx != 1)
807       return UnableToLegalize;
808     Observer.changingInstr(MI);
809     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
810     Observer.changedInstr(MI);
811     return Legalized;
812 
813   case TargetOpcode::G_UITOFP:
814     if (TypeIdx != 1)
815       return UnableToLegalize;
816     Observer.changingInstr(MI);
817     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
818     Observer.changedInstr(MI);
819     return Legalized;
820 
821   case TargetOpcode::G_INSERT:
822     if (TypeIdx != 0)
823       return UnableToLegalize;
824     Observer.changingInstr(MI);
825     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
826     widenScalarDst(MI, WideTy);
827     Observer.changedInstr(MI);
828     return Legalized;
829 
830   case TargetOpcode::G_LOAD:
831     // For some types like i24, we might try to widen to i32. To properly handle
832     // this we should be using a dedicated extending load, until then avoid
833     // trying to legalize.
834     if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
835         WideTy.getSizeInBits())
836       return UnableToLegalize;
837     LLVM_FALLTHROUGH;
838   case TargetOpcode::G_SEXTLOAD:
839   case TargetOpcode::G_ZEXTLOAD:
840     Observer.changingInstr(MI);
841     widenScalarDst(MI, WideTy);
842     Observer.changedInstr(MI);
843     return Legalized;
844 
845   case TargetOpcode::G_STORE: {
846     if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
847         WideTy != LLT::scalar(8))
848       return UnableToLegalize;
849 
850     Observer.changingInstr(MI);
851     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
852     Observer.changedInstr(MI);
853     return Legalized;
854   }
855   case TargetOpcode::G_CONSTANT: {
856     MachineOperand &SrcMO = MI.getOperand(1);
857     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
858     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
859     Observer.changingInstr(MI);
860     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
861 
862     widenScalarDst(MI, WideTy);
863     Observer.changedInstr(MI);
864     return Legalized;
865   }
866   case TargetOpcode::G_FCONSTANT: {
867     MachineOperand &SrcMO = MI.getOperand(1);
868     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
869     APFloat Val = SrcMO.getFPImm()->getValueAPF();
870     bool LosesInfo;
871     switch (WideTy.getSizeInBits()) {
872     case 32:
873       Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
874       break;
875     case 64:
876       Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
877       break;
878     default:
879       llvm_unreachable("Unhandled fp widen type");
880     }
881     Observer.changingInstr(MI);
882     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
883 
884     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
885     Observer.changedInstr(MI);
886     return Legalized;
887   }
888   case TargetOpcode::G_IMPLICIT_DEF: {
889     Observer.changingInstr(MI);
890     widenScalarDst(MI, WideTy);
891     Observer.changedInstr(MI);
892     return Legalized;
893   }
894   case TargetOpcode::G_BRCOND:
895     Observer.changingInstr(MI);
896     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
897     Observer.changedInstr(MI);
898     return Legalized;
899 
900   case TargetOpcode::G_FCMP:
901     Observer.changingInstr(MI);
902     if (TypeIdx == 0)
903       widenScalarDst(MI, WideTy);
904     else {
905       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
906       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
907     }
908     Observer.changedInstr(MI);
909     return Legalized;
910 
911   case TargetOpcode::G_ICMP:
912     Observer.changingInstr(MI);
913     if (TypeIdx == 0)
914       widenScalarDst(MI, WideTy);
915     else {
916       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
917                                MI.getOperand(1).getPredicate()))
918                                ? TargetOpcode::G_SEXT
919                                : TargetOpcode::G_ZEXT;
920       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
921       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
922     }
923     Observer.changedInstr(MI);
924     return Legalized;
925 
926   case TargetOpcode::G_GEP:
927     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
928     Observer.changingInstr(MI);
929     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
930     Observer.changedInstr(MI);
931     return Legalized;
932 
933   case TargetOpcode::G_PHI: {
934     assert(TypeIdx == 0 && "Expecting only Idx 0");
935 
936     Observer.changingInstr(MI);
937     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
938       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
939       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
940       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
941     }
942 
943     MachineBasicBlock &MBB = *MI.getParent();
944     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
945     widenScalarDst(MI, WideTy);
946     Observer.changedInstr(MI);
947     return Legalized;
948   }
949   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
950     if (TypeIdx != 2)
951       return UnableToLegalize;
952     Observer.changingInstr(MI);
953     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
954     Observer.changedInstr(MI);
955     return Legalized;
956   case TargetOpcode::G_FADD:
957   case TargetOpcode::G_FMUL:
958   case TargetOpcode::G_FSUB:
959   case TargetOpcode::G_FMA:
960   case TargetOpcode::G_FNEG:
961   case TargetOpcode::G_FABS:
962   case TargetOpcode::G_FDIV:
963   case TargetOpcode::G_FREM:
964   case TargetOpcode::G_FCEIL:
965     assert(TypeIdx == 0);
966     Observer.changingInstr(MI);
967 
968     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
969       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
970 
971     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
972     Observer.changedInstr(MI);
973     return Legalized;
974   }
975 }
976 
977 LegalizerHelper::LegalizeResult
978 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
979   using namespace TargetOpcode;
980   MIRBuilder.setInstr(MI);
981 
982   switch(MI.getOpcode()) {
983   default:
984     return UnableToLegalize;
985   case TargetOpcode::G_SREM:
986   case TargetOpcode::G_UREM: {
987     unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
988     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
989         .addDef(QuotReg)
990         .addUse(MI.getOperand(1).getReg())
991         .addUse(MI.getOperand(2).getReg());
992 
993     unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
994     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
995     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
996                         ProdReg);
997     MI.eraseFromParent();
998     return Legalized;
999   }
1000   case TargetOpcode::G_SMULO:
1001   case TargetOpcode::G_UMULO: {
1002     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1003     // result.
1004     unsigned Res = MI.getOperand(0).getReg();
1005     unsigned Overflow = MI.getOperand(1).getReg();
1006     unsigned LHS = MI.getOperand(2).getReg();
1007     unsigned RHS = MI.getOperand(3).getReg();
1008 
1009     MIRBuilder.buildMul(Res, LHS, RHS);
1010 
1011     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1012                           ? TargetOpcode::G_SMULH
1013                           : TargetOpcode::G_UMULH;
1014 
1015     unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1016     MIRBuilder.buildInstr(Opcode)
1017       .addDef(HiPart)
1018       .addUse(LHS)
1019       .addUse(RHS);
1020 
1021     unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1022     MIRBuilder.buildConstant(Zero, 0);
1023 
1024     // For *signed* multiply, overflow is detected by checking:
1025     // (hi != (lo >> bitwidth-1))
1026     if (Opcode == TargetOpcode::G_SMULH) {
1027       unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1028       unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1029       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1030       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1031         .addDef(Shifted)
1032         .addUse(Res)
1033         .addUse(ShiftAmt);
1034       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1035     } else {
1036       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1037     }
1038     MI.eraseFromParent();
1039     return Legalized;
1040   }
1041   case TargetOpcode::G_FNEG: {
1042     // TODO: Handle vector types once we are able to
1043     // represent them.
1044     if (Ty.isVector())
1045       return UnableToLegalize;
1046     unsigned Res = MI.getOperand(0).getReg();
1047     Type *ZeroTy;
1048     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1049     switch (Ty.getSizeInBits()) {
1050     case 16:
1051       ZeroTy = Type::getHalfTy(Ctx);
1052       break;
1053     case 32:
1054       ZeroTy = Type::getFloatTy(Ctx);
1055       break;
1056     case 64:
1057       ZeroTy = Type::getDoubleTy(Ctx);
1058       break;
1059     case 128:
1060       ZeroTy = Type::getFP128Ty(Ctx);
1061       break;
1062     default:
1063       llvm_unreachable("unexpected floating-point type");
1064     }
1065     ConstantFP &ZeroForNegation =
1066         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1067     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1068     MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1069         .addDef(Res)
1070         .addUse(Zero->getOperand(0).getReg())
1071         .addUse(MI.getOperand(1).getReg());
1072     MI.eraseFromParent();
1073     return Legalized;
1074   }
1075   case TargetOpcode::G_FSUB: {
1076     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1077     // First, check if G_FNEG is marked as Lower. If so, we may
1078     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1079     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1080       return UnableToLegalize;
1081     unsigned Res = MI.getOperand(0).getReg();
1082     unsigned LHS = MI.getOperand(1).getReg();
1083     unsigned RHS = MI.getOperand(2).getReg();
1084     unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1085     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1086     MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1087         .addDef(Res)
1088         .addUse(LHS)
1089         .addUse(Neg);
1090     MI.eraseFromParent();
1091     return Legalized;
1092   }
1093   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1094     unsigned OldValRes = MI.getOperand(0).getReg();
1095     unsigned SuccessRes = MI.getOperand(1).getReg();
1096     unsigned Addr = MI.getOperand(2).getReg();
1097     unsigned CmpVal = MI.getOperand(3).getReg();
1098     unsigned NewVal = MI.getOperand(4).getReg();
1099     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1100                                   **MI.memoperands_begin());
1101     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1102     MI.eraseFromParent();
1103     return Legalized;
1104   }
1105   case TargetOpcode::G_LOAD:
1106   case TargetOpcode::G_SEXTLOAD:
1107   case TargetOpcode::G_ZEXTLOAD: {
1108     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1109     unsigned DstReg = MI.getOperand(0).getReg();
1110     unsigned PtrReg = MI.getOperand(1).getReg();
1111     LLT DstTy = MRI.getType(DstReg);
1112     auto &MMO = **MI.memoperands_begin();
1113 
1114     if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1115       // In the case of G_LOAD, this was a non-extending load already and we're
1116       // about to lower to the same instruction.
1117       if (MI.getOpcode() == TargetOpcode::G_LOAD)
1118           return UnableToLegalize;
1119       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1120       MI.eraseFromParent();
1121       return Legalized;
1122     }
1123 
1124     if (DstTy.isScalar()) {
1125       unsigned TmpReg = MRI.createGenericVirtualRegister(
1126           LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1127       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1128       switch (MI.getOpcode()) {
1129       default:
1130         llvm_unreachable("Unexpected opcode");
1131       case TargetOpcode::G_LOAD:
1132         MIRBuilder.buildAnyExt(DstReg, TmpReg);
1133         break;
1134       case TargetOpcode::G_SEXTLOAD:
1135         MIRBuilder.buildSExt(DstReg, TmpReg);
1136         break;
1137       case TargetOpcode::G_ZEXTLOAD:
1138         MIRBuilder.buildZExt(DstReg, TmpReg);
1139         break;
1140       }
1141       MI.eraseFromParent();
1142       return Legalized;
1143     }
1144 
1145     return UnableToLegalize;
1146   }
1147   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1148   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1149   case TargetOpcode::G_CTLZ:
1150   case TargetOpcode::G_CTTZ:
1151   case TargetOpcode::G_CTPOP:
1152     return lowerBitCount(MI, TypeIdx, Ty);
1153   case G_UADDE: {
1154     unsigned Res = MI.getOperand(0).getReg();
1155     unsigned CarryOut = MI.getOperand(1).getReg();
1156     unsigned LHS = MI.getOperand(2).getReg();
1157     unsigned RHS = MI.getOperand(3).getReg();
1158     unsigned CarryIn = MI.getOperand(4).getReg();
1159 
1160     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1161     unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1162 
1163     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1164     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1165     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1166     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1167 
1168     MI.eraseFromParent();
1169     return Legalized;
1170   }
1171   }
1172 }
1173 
1174 LegalizerHelper::LegalizeResult
1175 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
1176                                      LLT NarrowTy) {
1177   // FIXME: Don't know how to handle secondary types yet.
1178   if (TypeIdx != 0)
1179     return UnableToLegalize;
1180 
1181   MIRBuilder.setInstr(MI);
1182   switch (MI.getOpcode()) {
1183   default:
1184     return UnableToLegalize;
1185   case TargetOpcode::G_IMPLICIT_DEF: {
1186     SmallVector<unsigned, 2> DstRegs;
1187 
1188     unsigned NarrowSize = NarrowTy.getSizeInBits();
1189     unsigned DstReg = MI.getOperand(0).getReg();
1190     unsigned Size = MRI.getType(DstReg).getSizeInBits();
1191     int NumParts = Size / NarrowSize;
1192     // FIXME: Don't know how to handle the situation where the small vectors
1193     // aren't all the same size yet.
1194     if (Size % NarrowSize != 0)
1195       return UnableToLegalize;
1196 
1197     for (int i = 0; i < NumParts; ++i) {
1198       unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1199       MIRBuilder.buildUndef(TmpReg);
1200       DstRegs.push_back(TmpReg);
1201     }
1202 
1203     if (NarrowTy.isVector())
1204       MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1205     else
1206       MIRBuilder.buildBuildVector(DstReg, DstRegs);
1207 
1208     MI.eraseFromParent();
1209     return Legalized;
1210   }
1211   case TargetOpcode::G_ADD: {
1212     unsigned NarrowSize = NarrowTy.getSizeInBits();
1213     unsigned DstReg = MI.getOperand(0).getReg();
1214     unsigned Size = MRI.getType(DstReg).getSizeInBits();
1215     int NumParts = Size / NarrowSize;
1216     // FIXME: Don't know how to handle the situation where the small vectors
1217     // aren't all the same size yet.
1218     if (Size % NarrowSize != 0)
1219       return UnableToLegalize;
1220 
1221     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1222     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
1223     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
1224 
1225     for (int i = 0; i < NumParts; ++i) {
1226       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1227       MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
1228       DstRegs.push_back(DstReg);
1229     }
1230 
1231     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1232     MI.eraseFromParent();
1233     return Legalized;
1234   }
1235   case TargetOpcode::G_LOAD:
1236   case TargetOpcode::G_STORE: {
1237     bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1238     unsigned ValReg = MI.getOperand(0).getReg();
1239     unsigned AddrReg = MI.getOperand(1).getReg();
1240     unsigned NarrowSize = NarrowTy.getSizeInBits();
1241     unsigned Size = MRI.getType(ValReg).getSizeInBits();
1242     unsigned NumParts = Size / NarrowSize;
1243 
1244     SmallVector<unsigned, 8> NarrowRegs;
1245     if (!IsLoad)
1246       extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
1247 
1248     const LLT OffsetTy =
1249         LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
1250     MachineFunction &MF = *MI.getMF();
1251     MachineMemOperand *MMO = *MI.memoperands_begin();
1252     for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
1253       unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
1254       unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
1255       unsigned NewAddrReg = 0;
1256       MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
1257       MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
1258           MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
1259           NarrowTy.getSizeInBits() / 8, Alignment);
1260       if (IsLoad) {
1261         unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
1262         NarrowRegs.push_back(Dst);
1263         MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
1264       } else {
1265         MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
1266       }
1267     }
1268     if (IsLoad) {
1269       if (NarrowTy.isVector())
1270         MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
1271       else
1272         MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
1273     }
1274     MI.eraseFromParent();
1275     return Legalized;
1276   }
1277   }
1278 }
1279 
1280 LegalizerHelper::LegalizeResult
1281 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1282   unsigned Opc = MI.getOpcode();
1283   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1284   auto isSupported = [this](const LegalityQuery &Q) {
1285     auto QAction = LI.getAction(Q).Action;
1286     return QAction == Legal || QAction == Libcall || QAction == Custom;
1287   };
1288   switch (Opc) {
1289   default:
1290     return UnableToLegalize;
1291   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1292     // This trivially expands to CTLZ.
1293     Observer.changingInstr(MI);
1294     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
1295     Observer.changedInstr(MI);
1296     return Legalized;
1297   }
1298   case TargetOpcode::G_CTLZ: {
1299     unsigned SrcReg = MI.getOperand(1).getReg();
1300     unsigned Len = Ty.getSizeInBits();
1301     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1302       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
1303       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
1304                                              {Ty}, {SrcReg});
1305       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1306       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1307       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1308                                           SrcReg, MIBZero);
1309       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1310                              MIBCtlzZU);
1311       MI.eraseFromParent();
1312       return Legalized;
1313     }
1314     // for now, we do this:
1315     // NewLen = NextPowerOf2(Len);
1316     // x = x | (x >> 1);
1317     // x = x | (x >> 2);
1318     // ...
1319     // x = x | (x >>16);
1320     // x = x | (x >>32); // for 64-bit input
1321     // Upto NewLen/2
1322     // return Len - popcount(x);
1323     //
1324     // Ref: "Hacker's Delight" by Henry Warren
1325     unsigned Op = SrcReg;
1326     unsigned NewLen = PowerOf2Ceil(Len);
1327     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
1328       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
1329       auto MIBOp = MIRBuilder.buildInstr(
1330           TargetOpcode::G_OR, {Ty},
1331           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
1332                                      {Op, MIBShiftAmt})});
1333       Op = MIBOp->getOperand(0).getReg();
1334     }
1335     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
1336     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1337                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
1338     MI.eraseFromParent();
1339     return Legalized;
1340   }
1341   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
1342     // This trivially expands to CTTZ.
1343     Observer.changingInstr(MI);
1344     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
1345     Observer.changedInstr(MI);
1346     return Legalized;
1347   }
1348   case TargetOpcode::G_CTTZ: {
1349     unsigned SrcReg = MI.getOperand(1).getReg();
1350     unsigned Len = Ty.getSizeInBits();
1351     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
1352       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1353       // zero.
1354       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
1355                                              {Ty}, {SrcReg});
1356       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1357       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1358       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1359                                           SrcReg, MIBZero);
1360       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1361                              MIBCttzZU);
1362       MI.eraseFromParent();
1363       return Legalized;
1364     }
1365     // for now, we use: { return popcount(~x & (x - 1)); }
1366     // unless the target has ctlz but not ctpop, in which case we use:
1367     // { return 32 - nlz(~x & (x-1)); }
1368     // Ref: "Hacker's Delight" by Henry Warren
1369     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
1370     auto MIBNot =
1371         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
1372     auto MIBTmp = MIRBuilder.buildInstr(
1373         TargetOpcode::G_AND, {Ty},
1374         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
1375                                        {SrcReg, MIBCstNeg1})});
1376     if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
1377         isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
1378       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
1379       MIRBuilder.buildInstr(
1380           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1381           {MIBCstLen,
1382            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
1383       MI.eraseFromParent();
1384       return Legalized;
1385     }
1386     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
1387     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
1388     return Legalized;
1389   }
1390   }
1391 }
1392