1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
33                                  GISelChangeObserver &Observer,
34                                  MachineIRBuilder &Builder)
35     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
36       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
37   MIRBuilder.setMF(MF);
38   MIRBuilder.setChangeObserver(Observer);
39 }
40 
41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
42                                  GISelChangeObserver &Observer,
43                                  MachineIRBuilder &B)
44     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
45   MIRBuilder.setMF(MF);
46   MIRBuilder.setChangeObserver(Observer);
47 }
48 LegalizerHelper::LegalizeResult
49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
50   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
51 
52   auto Step = LI.getAction(MI, MRI);
53   switch (Step.Action) {
54   case Legal:
55     LLVM_DEBUG(dbgs() << ".. Already legal\n");
56     return AlreadyLegal;
57   case Libcall:
58     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
59     return libcall(MI);
60   case NarrowScalar:
61     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
62     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
63   case WidenScalar:
64     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
65     return widenScalar(MI, Step.TypeIdx, Step.NewType);
66   case Lower:
67     LLVM_DEBUG(dbgs() << ".. Lower\n");
68     return lower(MI, Step.TypeIdx, Step.NewType);
69   case FewerElements:
70     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
71     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
72   case Custom:
73     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
74     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
75                                                             : UnableToLegalize;
76   default:
77     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
78     return UnableToLegalize;
79   }
80 }
81 
82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
83                                    SmallVectorImpl<unsigned> &VRegs) {
84   for (int i = 0; i < NumParts; ++i)
85     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
86   MIRBuilder.buildUnmerge(VRegs, Reg);
87 }
88 
89 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
90   switch (Opcode) {
91   case TargetOpcode::G_SDIV:
92     assert((Size == 32 || Size == 64) && "Unsupported size");
93     return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
94   case TargetOpcode::G_UDIV:
95     assert((Size == 32 || Size == 64) && "Unsupported size");
96     return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
97   case TargetOpcode::G_SREM:
98     assert((Size == 32 || Size == 64) && "Unsupported size");
99     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
100   case TargetOpcode::G_UREM:
101     assert((Size == 32 || Size == 64) && "Unsupported size");
102     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
103   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
104     assert(Size == 32 && "Unsupported size");
105     return RTLIB::CTLZ_I32;
106   case TargetOpcode::G_FADD:
107     assert((Size == 32 || Size == 64) && "Unsupported size");
108     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
109   case TargetOpcode::G_FSUB:
110     assert((Size == 32 || Size == 64) && "Unsupported size");
111     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
112   case TargetOpcode::G_FMUL:
113     assert((Size == 32 || Size == 64) && "Unsupported size");
114     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
115   case TargetOpcode::G_FDIV:
116     assert((Size == 32 || Size == 64) && "Unsupported size");
117     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
118   case TargetOpcode::G_FREM:
119     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
120   case TargetOpcode::G_FPOW:
121     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
122   case TargetOpcode::G_FMA:
123     assert((Size == 32 || Size == 64) && "Unsupported size");
124     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
125   }
126   llvm_unreachable("Unknown libcall function");
127 }
128 
129 LegalizerHelper::LegalizeResult
130 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
131                     const CallLowering::ArgInfo &Result,
132                     ArrayRef<CallLowering::ArgInfo> Args) {
133   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
134   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
135   const char *Name = TLI.getLibcallName(Libcall);
136 
137   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
138   if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
139                      MachineOperand::CreateES(Name), Result, Args))
140     return LegalizerHelper::UnableToLegalize;
141 
142   return LegalizerHelper::Legalized;
143 }
144 
145 // Useful for libcalls where all operands have the same type.
146 static LegalizerHelper::LegalizeResult
147 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
148               Type *OpType) {
149   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
150 
151   SmallVector<CallLowering::ArgInfo, 3> Args;
152   for (unsigned i = 1; i < MI.getNumOperands(); i++)
153     Args.push_back({MI.getOperand(i).getReg(), OpType});
154   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
155                        Args);
156 }
157 
158 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
159                                        Type *FromType) {
160   auto ToMVT = MVT::getVT(ToType);
161   auto FromMVT = MVT::getVT(FromType);
162 
163   switch (Opcode) {
164   case TargetOpcode::G_FPEXT:
165     return RTLIB::getFPEXT(FromMVT, ToMVT);
166   case TargetOpcode::G_FPTRUNC:
167     return RTLIB::getFPROUND(FromMVT, ToMVT);
168   case TargetOpcode::G_FPTOSI:
169     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
170   case TargetOpcode::G_FPTOUI:
171     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
172   case TargetOpcode::G_SITOFP:
173     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
174   case TargetOpcode::G_UITOFP:
175     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
176   }
177   llvm_unreachable("Unsupported libcall function");
178 }
179 
180 static LegalizerHelper::LegalizeResult
181 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
182                   Type *FromType) {
183   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
184   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
185                        {{MI.getOperand(1).getReg(), FromType}});
186 }
187 
188 LegalizerHelper::LegalizeResult
189 LegalizerHelper::libcall(MachineInstr &MI) {
190   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
191   unsigned Size = LLTy.getSizeInBits();
192   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
193 
194   MIRBuilder.setInstr(MI);
195 
196   switch (MI.getOpcode()) {
197   default:
198     return UnableToLegalize;
199   case TargetOpcode::G_SDIV:
200   case TargetOpcode::G_UDIV:
201   case TargetOpcode::G_SREM:
202   case TargetOpcode::G_UREM:
203   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
204     Type *HLTy = IntegerType::get(Ctx, Size);
205     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
206     if (Status != Legalized)
207       return Status;
208     break;
209   }
210   case TargetOpcode::G_FADD:
211   case TargetOpcode::G_FSUB:
212   case TargetOpcode::G_FMUL:
213   case TargetOpcode::G_FDIV:
214   case TargetOpcode::G_FMA:
215   case TargetOpcode::G_FPOW:
216   case TargetOpcode::G_FREM: {
217     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
218     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
219     if (Status != Legalized)
220       return Status;
221     break;
222   }
223   case TargetOpcode::G_FPEXT: {
224     // FIXME: Support other floating point types (half, fp128 etc)
225     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
226     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
227     if (ToSize != 64 || FromSize != 32)
228       return UnableToLegalize;
229     LegalizeResult Status = conversionLibcall(
230         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
231     if (Status != Legalized)
232       return Status;
233     break;
234   }
235   case TargetOpcode::G_FPTRUNC: {
236     // FIXME: Support other floating point types (half, fp128 etc)
237     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
238     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
239     if (ToSize != 32 || FromSize != 64)
240       return UnableToLegalize;
241     LegalizeResult Status = conversionLibcall(
242         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
243     if (Status != Legalized)
244       return Status;
245     break;
246   }
247   case TargetOpcode::G_FPTOSI:
248   case TargetOpcode::G_FPTOUI: {
249     // FIXME: Support other types
250     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
251     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
252     if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
253       return UnableToLegalize;
254     LegalizeResult Status = conversionLibcall(
255         MI, MIRBuilder, Type::getInt32Ty(Ctx),
256         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
257     if (Status != Legalized)
258       return Status;
259     break;
260   }
261   case TargetOpcode::G_SITOFP:
262   case TargetOpcode::G_UITOFP: {
263     // FIXME: Support other types
264     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
265     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
266     if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
267       return UnableToLegalize;
268     LegalizeResult Status = conversionLibcall(
269         MI, MIRBuilder,
270         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
271         Type::getInt32Ty(Ctx));
272     if (Status != Legalized)
273       return Status;
274     break;
275   }
276   }
277 
278   MI.eraseFromParent();
279   return Legalized;
280 }
281 
282 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
283                                                               unsigned TypeIdx,
284                                                               LLT NarrowTy) {
285   MIRBuilder.setInstr(MI);
286 
287   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
288   uint64_t NarrowSize = NarrowTy.getSizeInBits();
289 
290   switch (MI.getOpcode()) {
291   default:
292     return UnableToLegalize;
293   case TargetOpcode::G_IMPLICIT_DEF: {
294     // FIXME: add support for when SizeOp0 isn't an exact multiple of
295     // NarrowSize.
296     if (SizeOp0 % NarrowSize != 0)
297       return UnableToLegalize;
298     int NumParts = SizeOp0 / NarrowSize;
299 
300     SmallVector<unsigned, 2> DstRegs;
301     for (int i = 0; i < NumParts; ++i)
302       DstRegs.push_back(
303           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
304 
305     unsigned DstReg = MI.getOperand(0).getReg();
306     if(MRI.getType(DstReg).isVector())
307       MIRBuilder.buildBuildVector(DstReg, DstRegs);
308     else
309       MIRBuilder.buildMerge(DstReg, DstRegs);
310     MI.eraseFromParent();
311     return Legalized;
312   }
313   case TargetOpcode::G_ADD: {
314     // FIXME: add support for when SizeOp0 isn't an exact multiple of
315     // NarrowSize.
316     if (SizeOp0 % NarrowSize != 0)
317       return UnableToLegalize;
318     // Expand in terms of carry-setting/consuming G_ADDE instructions.
319     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
320 
321     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
322     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
323     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
324 
325     unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
326     MIRBuilder.buildConstant(CarryIn, 0);
327 
328     for (int i = 0; i < NumParts; ++i) {
329       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
330       unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
331 
332       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
333                             Src2Regs[i], CarryIn);
334 
335       DstRegs.push_back(DstReg);
336       CarryIn = CarryOut;
337     }
338     unsigned DstReg = MI.getOperand(0).getReg();
339     if(MRI.getType(DstReg).isVector())
340       MIRBuilder.buildBuildVector(DstReg, DstRegs);
341     else
342       MIRBuilder.buildMerge(DstReg, DstRegs);
343     MI.eraseFromParent();
344     return Legalized;
345   }
346   case TargetOpcode::G_EXTRACT: {
347     if (TypeIdx != 1)
348       return UnableToLegalize;
349 
350     int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
351     // FIXME: add support for when SizeOp1 isn't an exact multiple of
352     // NarrowSize.
353     if (SizeOp1 % NarrowSize != 0)
354       return UnableToLegalize;
355     int NumParts = SizeOp1 / NarrowSize;
356 
357     SmallVector<unsigned, 2> SrcRegs, DstRegs;
358     SmallVector<uint64_t, 2> Indexes;
359     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
360 
361     unsigned OpReg = MI.getOperand(0).getReg();
362     uint64_t OpStart = MI.getOperand(2).getImm();
363     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
364     for (int i = 0; i < NumParts; ++i) {
365       unsigned SrcStart = i * NarrowSize;
366 
367       if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
368         // No part of the extract uses this subregister, ignore it.
369         continue;
370       } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
371         // The entire subregister is extracted, forward the value.
372         DstRegs.push_back(SrcRegs[i]);
373         continue;
374       }
375 
376       // OpSegStart is where this destination segment would start in OpReg if it
377       // extended infinitely in both directions.
378       int64_t ExtractOffset;
379       uint64_t SegSize;
380       if (OpStart < SrcStart) {
381         ExtractOffset = 0;
382         SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
383       } else {
384         ExtractOffset = OpStart - SrcStart;
385         SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
386       }
387 
388       unsigned SegReg = SrcRegs[i];
389       if (ExtractOffset != 0 || SegSize != NarrowSize) {
390         // A genuine extract is needed.
391         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
392         MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
393       }
394 
395       DstRegs.push_back(SegReg);
396     }
397 
398     unsigned DstReg = MI.getOperand(0).getReg();
399     if(MRI.getType(DstReg).isVector())
400       MIRBuilder.buildBuildVector(DstReg, DstRegs);
401     else
402       MIRBuilder.buildMerge(DstReg, DstRegs);
403     MI.eraseFromParent();
404     return Legalized;
405   }
406   case TargetOpcode::G_INSERT: {
407     // FIXME: Don't know how to handle secondary types yet.
408     if (TypeIdx != 0)
409       return UnableToLegalize;
410 
411     // FIXME: add support for when SizeOp0 isn't an exact multiple of
412     // NarrowSize.
413     if (SizeOp0 % NarrowSize != 0)
414       return UnableToLegalize;
415 
416     int NumParts = SizeOp0 / NarrowSize;
417 
418     SmallVector<unsigned, 2> SrcRegs, DstRegs;
419     SmallVector<uint64_t, 2> Indexes;
420     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
421 
422     unsigned OpReg = MI.getOperand(2).getReg();
423     uint64_t OpStart = MI.getOperand(3).getImm();
424     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
425     for (int i = 0; i < NumParts; ++i) {
426       unsigned DstStart = i * NarrowSize;
427 
428       if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
429         // No part of the insert affects this subregister, forward the original.
430         DstRegs.push_back(SrcRegs[i]);
431         continue;
432       } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
433         // The entire subregister is defined by this insert, forward the new
434         // value.
435         DstRegs.push_back(OpReg);
436         continue;
437       }
438 
439       // OpSegStart is where this destination segment would start in OpReg if it
440       // extended infinitely in both directions.
441       int64_t ExtractOffset, InsertOffset;
442       uint64_t SegSize;
443       if (OpStart < DstStart) {
444         InsertOffset = 0;
445         ExtractOffset = DstStart - OpStart;
446         SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
447       } else {
448         InsertOffset = OpStart - DstStart;
449         ExtractOffset = 0;
450         SegSize =
451             std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
452       }
453 
454       unsigned SegReg = OpReg;
455       if (ExtractOffset != 0 || SegSize != OpSize) {
456         // A genuine extract is needed.
457         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
458         MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
459       }
460 
461       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
462       MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
463       DstRegs.push_back(DstReg);
464     }
465 
466     assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
467     unsigned DstReg = MI.getOperand(0).getReg();
468     if(MRI.getType(DstReg).isVector())
469       MIRBuilder.buildBuildVector(DstReg, DstRegs);
470     else
471       MIRBuilder.buildMerge(DstReg, DstRegs);
472     MI.eraseFromParent();
473     return Legalized;
474   }
475   case TargetOpcode::G_LOAD: {
476     // FIXME: add support for when SizeOp0 isn't an exact multiple of
477     // NarrowSize.
478     if (SizeOp0 % NarrowSize != 0)
479       return UnableToLegalize;
480 
481     const auto &MMO = **MI.memoperands_begin();
482     // This implementation doesn't work for atomics. Give up instead of doing
483     // something invalid.
484     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
485         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
486       return UnableToLegalize;
487 
488     int NumParts = SizeOp0 / NarrowSize;
489     LLT OffsetTy = LLT::scalar(
490         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
491 
492     SmallVector<unsigned, 2> DstRegs;
493     for (int i = 0; i < NumParts; ++i) {
494       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
495       unsigned SrcReg = 0;
496       unsigned Adjustment = i * NarrowSize / 8;
497       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
498 
499       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
500           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
501           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
502           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
503 
504       MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
505                                 Adjustment);
506 
507       MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
508 
509       DstRegs.push_back(DstReg);
510     }
511     unsigned DstReg = MI.getOperand(0).getReg();
512     if(MRI.getType(DstReg).isVector())
513       MIRBuilder.buildBuildVector(DstReg, DstRegs);
514     else
515       MIRBuilder.buildMerge(DstReg, DstRegs);
516     MI.eraseFromParent();
517     return Legalized;
518   }
519   case TargetOpcode::G_ZEXTLOAD:
520   case TargetOpcode::G_SEXTLOAD: {
521     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
522     unsigned DstReg = MI.getOperand(0).getReg();
523     unsigned PtrReg = MI.getOperand(1).getReg();
524 
525     unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
526     auto &MMO = **MI.memoperands_begin();
527     if (MMO.getSize() * 8 == NarrowSize) {
528       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
529     } else {
530       unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
531         : TargetOpcode::G_SEXTLOAD;
532       MIRBuilder.buildInstr(ExtLoad)
533         .addDef(TmpReg)
534         .addUse(PtrReg)
535         .addMemOperand(&MMO);
536     }
537 
538     if (ZExt)
539       MIRBuilder.buildZExt(DstReg, TmpReg);
540     else
541       MIRBuilder.buildSExt(DstReg, TmpReg);
542 
543     MI.eraseFromParent();
544     return Legalized;
545   }
546   case TargetOpcode::G_STORE: {
547     // FIXME: add support for when SizeOp0 isn't an exact multiple of
548     // NarrowSize.
549     if (SizeOp0 % NarrowSize != 0)
550       return UnableToLegalize;
551 
552     const auto &MMO = **MI.memoperands_begin();
553     // This implementation doesn't work for atomics. Give up instead of doing
554     // something invalid.
555     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
556         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
557       return UnableToLegalize;
558 
559     int NumParts = SizeOp0 / NarrowSize;
560     LLT OffsetTy = LLT::scalar(
561         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
562 
563     SmallVector<unsigned, 2> SrcRegs;
564     extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
565 
566     for (int i = 0; i < NumParts; ++i) {
567       unsigned DstReg = 0;
568       unsigned Adjustment = i * NarrowSize / 8;
569       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
570 
571       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
572           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
573           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
574           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
575 
576       MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
577                                 Adjustment);
578 
579       MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
580     }
581     MI.eraseFromParent();
582     return Legalized;
583   }
584   case TargetOpcode::G_CONSTANT: {
585     // FIXME: add support for when SizeOp0 isn't an exact multiple of
586     // NarrowSize.
587     if (SizeOp0 % NarrowSize != 0)
588       return UnableToLegalize;
589     int NumParts = SizeOp0 / NarrowSize;
590     const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
591     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
592 
593     SmallVector<unsigned, 2> DstRegs;
594     for (int i = 0; i < NumParts; ++i) {
595       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
596       ConstantInt *CI =
597           ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
598       MIRBuilder.buildConstant(DstReg, *CI);
599       DstRegs.push_back(DstReg);
600     }
601     unsigned DstReg = MI.getOperand(0).getReg();
602     if(MRI.getType(DstReg).isVector())
603       MIRBuilder.buildBuildVector(DstReg, DstRegs);
604     else
605       MIRBuilder.buildMerge(DstReg, DstRegs);
606     MI.eraseFromParent();
607     return Legalized;
608   }
609   case TargetOpcode::G_AND:
610   case TargetOpcode::G_OR:
611   case TargetOpcode::G_XOR: {
612     // Legalize bitwise operation:
613     // A = BinOp<Ty> B, C
614     // into:
615     // B1, ..., BN = G_UNMERGE_VALUES B
616     // C1, ..., CN = G_UNMERGE_VALUES C
617     // A1 = BinOp<Ty/N> B1, C2
618     // ...
619     // AN = BinOp<Ty/N> BN, CN
620     // A = G_MERGE_VALUES A1, ..., AN
621 
622     // FIXME: add support for when SizeOp0 isn't an exact multiple of
623     // NarrowSize.
624     if (SizeOp0 % NarrowSize != 0)
625       return UnableToLegalize;
626     int NumParts = SizeOp0 / NarrowSize;
627 
628     // List the registers where the destination will be scattered.
629     SmallVector<unsigned, 2> DstRegs;
630     // List the registers where the first argument will be split.
631     SmallVector<unsigned, 2> SrcsReg1;
632     // List the registers where the second argument will be split.
633     SmallVector<unsigned, 2> SrcsReg2;
634     // Create all the temporary registers.
635     for (int i = 0; i < NumParts; ++i) {
636       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
637       unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
638       unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
639 
640       DstRegs.push_back(DstReg);
641       SrcsReg1.push_back(SrcReg1);
642       SrcsReg2.push_back(SrcReg2);
643     }
644     // Explode the big arguments into smaller chunks.
645     MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
646     MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
647 
648     // Do the operation on each small part.
649     for (int i = 0; i < NumParts; ++i)
650       MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
651                             {SrcsReg1[i], SrcsReg2[i]});
652 
653     // Gather the destination registers into the final destination.
654     unsigned DstReg = MI.getOperand(0).getReg();
655     if(MRI.getType(DstReg).isVector())
656       MIRBuilder.buildBuildVector(DstReg, DstRegs);
657     else
658       MIRBuilder.buildMerge(DstReg, DstRegs);
659     MI.eraseFromParent();
660     return Legalized;
661   }
662   case TargetOpcode::G_SHL:
663   case TargetOpcode::G_LSHR:
664   case TargetOpcode::G_ASHR: {
665     if (TypeIdx != 1)
666       return UnableToLegalize; // TODO
667     narrowScalarSrc(MI, NarrowTy, 2);
668     return Legalized;
669   }
670   }
671 }
672 
673 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
674                                      unsigned OpIdx, unsigned ExtOpcode) {
675   MachineOperand &MO = MI.getOperand(OpIdx);
676   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
677   MO.setReg(ExtB->getOperand(0).getReg());
678 }
679 
680 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
681                                       unsigned OpIdx) {
682   MachineOperand &MO = MI.getOperand(OpIdx);
683   auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
684                                     {MO.getReg()});
685   MO.setReg(ExtB->getOperand(0).getReg());
686 }
687 
688 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
689                                      unsigned OpIdx, unsigned TruncOpcode) {
690   MachineOperand &MO = MI.getOperand(OpIdx);
691   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
692   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
693   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
694   MO.setReg(DstExt);
695 }
696 
697 LegalizerHelper::LegalizeResult
698 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
699   MIRBuilder.setInstr(MI);
700 
701   switch (MI.getOpcode()) {
702   default:
703     return UnableToLegalize;
704   case TargetOpcode::G_UADDO:
705   case TargetOpcode::G_USUBO: {
706     if (TypeIdx == 1)
707       return UnableToLegalize; // TODO
708     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
709                                          {MI.getOperand(2).getReg()});
710     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
711                                          {MI.getOperand(3).getReg()});
712     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
713                           ? TargetOpcode::G_ADD
714                           : TargetOpcode::G_SUB;
715     // Do the arithmetic in the larger type.
716     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
717     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
718     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
719     auto AndOp = MIRBuilder.buildInstr(
720         TargetOpcode::G_AND, {WideTy},
721         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
722     // There is no overflow if the AndOp is the same as NewOp.
723     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
724                          AndOp);
725     // Now trunc the NewOp to the original result.
726     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
727     MI.eraseFromParent();
728     return Legalized;
729   }
730   case TargetOpcode::G_CTTZ:
731   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
732   case TargetOpcode::G_CTLZ:
733   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
734   case TargetOpcode::G_CTPOP: {
735     // First ZEXT the input.
736     auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
737     LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
738     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
739       // The count is the same in the larger type except if the original
740       // value was zero.  This can be handled by setting the bit just off
741       // the top of the original type.
742       auto TopBit =
743           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
744       MIBSrc = MIRBuilder.buildInstr(
745           TargetOpcode::G_OR, {WideTy},
746           {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
747     }
748     // Perform the operation at the larger size.
749     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
750     // This is already the correct result for CTPOP and CTTZs
751     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
752         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
753       // The correct result is NewOp - (Difference in widety and current ty).
754       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
755       MIBNewOp = MIRBuilder.buildInstr(
756           TargetOpcode::G_SUB, {WideTy},
757           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
758     }
759     auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
760     // Make the original instruction a trunc now, and update its source.
761     Observer.changingInstr(MI);
762     MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
763     MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
764     Observer.changedInstr(MI);
765     return Legalized;
766   }
767 
768   case TargetOpcode::G_ADD:
769   case TargetOpcode::G_AND:
770   case TargetOpcode::G_MUL:
771   case TargetOpcode::G_OR:
772   case TargetOpcode::G_XOR:
773   case TargetOpcode::G_SUB:
774     // Perform operation at larger width (any extension is fine here, high bits
775     // don't affect the result) and then truncate the result back to the
776     // original type.
777     Observer.changingInstr(MI);
778     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
779     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
780     widenScalarDst(MI, WideTy);
781     Observer.changedInstr(MI);
782     return Legalized;
783 
784   case TargetOpcode::G_SHL:
785       Observer.changingInstr(MI);
786 
787     if (TypeIdx == 0) {
788       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
789       widenScalarDst(MI, WideTy);
790     } else {
791       assert(TypeIdx == 1);
792       // The "number of bits to shift" operand must preserve its value as an
793       // unsigned integer:
794       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
795     }
796 
797     Observer.changedInstr(MI);
798     return Legalized;
799 
800   case TargetOpcode::G_SDIV:
801   case TargetOpcode::G_SREM:
802     Observer.changingInstr(MI);
803     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
804     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
805     widenScalarDst(MI, WideTy);
806     Observer.changedInstr(MI);
807     return Legalized;
808 
809   case TargetOpcode::G_ASHR:
810   case TargetOpcode::G_LSHR:
811     Observer.changingInstr(MI);
812 
813     if (TypeIdx == 0) {
814       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
815         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
816 
817       widenScalarSrc(MI, WideTy, 1, CvtOp);
818       widenScalarDst(MI, WideTy);
819     } else {
820       assert(TypeIdx == 1);
821       // The "number of bits to shift" operand must preserve its value as an
822       // unsigned integer:
823       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
824     }
825 
826     Observer.changedInstr(MI);
827     return Legalized;
828   case TargetOpcode::G_UDIV:
829   case TargetOpcode::G_UREM:
830     Observer.changingInstr(MI);
831     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
832     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
833     widenScalarDst(MI, WideTy);
834     Observer.changedInstr(MI);
835     return Legalized;
836 
837   case TargetOpcode::G_SELECT:
838     Observer.changingInstr(MI);
839     if (TypeIdx == 0) {
840       // Perform operation at larger width (any extension is fine here, high
841       // bits don't affect the result) and then truncate the result back to the
842       // original type.
843       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
844       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
845       widenScalarDst(MI, WideTy);
846     } else {
847       // Explicit extension is required here since high bits affect the result.
848       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
849     }
850     Observer.changedInstr(MI);
851     return Legalized;
852 
853   case TargetOpcode::G_FPTOSI:
854   case TargetOpcode::G_FPTOUI:
855     if (TypeIdx != 0)
856       return UnableToLegalize;
857     Observer.changingInstr(MI);
858     widenScalarDst(MI, WideTy);
859     Observer.changedInstr(MI);
860     return Legalized;
861 
862   case TargetOpcode::G_SITOFP:
863     if (TypeIdx != 1)
864       return UnableToLegalize;
865     Observer.changingInstr(MI);
866     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
867     Observer.changedInstr(MI);
868     return Legalized;
869 
870   case TargetOpcode::G_UITOFP:
871     if (TypeIdx != 1)
872       return UnableToLegalize;
873     Observer.changingInstr(MI);
874     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
875     Observer.changedInstr(MI);
876     return Legalized;
877 
878   case TargetOpcode::G_INSERT:
879     if (TypeIdx != 0)
880       return UnableToLegalize;
881     Observer.changingInstr(MI);
882     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
883     widenScalarDst(MI, WideTy);
884     Observer.changedInstr(MI);
885     return Legalized;
886 
887   case TargetOpcode::G_LOAD:
888     // For some types like i24, we might try to widen to i32. To properly handle
889     // this we should be using a dedicated extending load, until then avoid
890     // trying to legalize.
891     if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
892         WideTy.getSizeInBits())
893       return UnableToLegalize;
894     LLVM_FALLTHROUGH;
895   case TargetOpcode::G_SEXTLOAD:
896   case TargetOpcode::G_ZEXTLOAD:
897     Observer.changingInstr(MI);
898     widenScalarDst(MI, WideTy);
899     Observer.changedInstr(MI);
900     return Legalized;
901 
902   case TargetOpcode::G_STORE: {
903     if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
904         WideTy != LLT::scalar(8))
905       return UnableToLegalize;
906 
907     Observer.changingInstr(MI);
908     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
909     Observer.changedInstr(MI);
910     return Legalized;
911   }
912   case TargetOpcode::G_CONSTANT: {
913     MachineOperand &SrcMO = MI.getOperand(1);
914     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
915     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
916     Observer.changingInstr(MI);
917     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
918 
919     widenScalarDst(MI, WideTy);
920     Observer.changedInstr(MI);
921     return Legalized;
922   }
923   case TargetOpcode::G_FCONSTANT: {
924     MachineOperand &SrcMO = MI.getOperand(1);
925     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
926     APFloat Val = SrcMO.getFPImm()->getValueAPF();
927     bool LosesInfo;
928     switch (WideTy.getSizeInBits()) {
929     case 32:
930       Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
931       break;
932     case 64:
933       Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
934       break;
935     default:
936       llvm_unreachable("Unhandled fp widen type");
937     }
938     Observer.changingInstr(MI);
939     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
940 
941     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
942     Observer.changedInstr(MI);
943     return Legalized;
944   }
945   case TargetOpcode::G_IMPLICIT_DEF: {
946     Observer.changingInstr(MI);
947     widenScalarDst(MI, WideTy);
948     Observer.changedInstr(MI);
949     return Legalized;
950   }
951   case TargetOpcode::G_BRCOND:
952     Observer.changingInstr(MI);
953     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
954     Observer.changedInstr(MI);
955     return Legalized;
956 
957   case TargetOpcode::G_FCMP:
958     Observer.changingInstr(MI);
959     if (TypeIdx == 0)
960       widenScalarDst(MI, WideTy);
961     else {
962       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
963       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
964     }
965     Observer.changedInstr(MI);
966     return Legalized;
967 
968   case TargetOpcode::G_ICMP:
969     Observer.changingInstr(MI);
970     if (TypeIdx == 0)
971       widenScalarDst(MI, WideTy);
972     else {
973       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
974                                MI.getOperand(1).getPredicate()))
975                                ? TargetOpcode::G_SEXT
976                                : TargetOpcode::G_ZEXT;
977       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
978       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
979     }
980     Observer.changedInstr(MI);
981     return Legalized;
982 
983   case TargetOpcode::G_GEP:
984     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
985     Observer.changingInstr(MI);
986     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
987     Observer.changedInstr(MI);
988     return Legalized;
989 
990   case TargetOpcode::G_PHI: {
991     assert(TypeIdx == 0 && "Expecting only Idx 0");
992 
993     Observer.changingInstr(MI);
994     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
995       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
996       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
997       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
998     }
999 
1000     MachineBasicBlock &MBB = *MI.getParent();
1001     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1002     widenScalarDst(MI, WideTy);
1003     Observer.changedInstr(MI);
1004     return Legalized;
1005   }
1006   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1007     if (TypeIdx == 0) {
1008       unsigned VecReg = MI.getOperand(1).getReg();
1009       LLT VecTy = MRI.getType(VecReg);
1010       Observer.changingInstr(MI);
1011 
1012       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1013                                      WideTy.getSizeInBits()),
1014                      1, TargetOpcode::G_SEXT);
1015 
1016       widenScalarDst(MI, WideTy, 0);
1017       Observer.changedInstr(MI);
1018       return Legalized;
1019     }
1020 
1021     if (TypeIdx != 2)
1022       return UnableToLegalize;
1023     Observer.changingInstr(MI);
1024     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1025     Observer.changedInstr(MI);
1026     return Legalized;
1027   }
1028   case TargetOpcode::G_FADD:
1029   case TargetOpcode::G_FMUL:
1030   case TargetOpcode::G_FSUB:
1031   case TargetOpcode::G_FMA:
1032   case TargetOpcode::G_FNEG:
1033   case TargetOpcode::G_FABS:
1034   case TargetOpcode::G_FDIV:
1035   case TargetOpcode::G_FREM:
1036   case TargetOpcode::G_FCEIL:
1037     assert(TypeIdx == 0);
1038     Observer.changingInstr(MI);
1039 
1040     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1041       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1042 
1043     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1044     Observer.changedInstr(MI);
1045     return Legalized;
1046   }
1047 }
1048 
1049 LegalizerHelper::LegalizeResult
1050 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1051   using namespace TargetOpcode;
1052   MIRBuilder.setInstr(MI);
1053 
1054   switch(MI.getOpcode()) {
1055   default:
1056     return UnableToLegalize;
1057   case TargetOpcode::G_SREM:
1058   case TargetOpcode::G_UREM: {
1059     unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
1060     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1061         .addDef(QuotReg)
1062         .addUse(MI.getOperand(1).getReg())
1063         .addUse(MI.getOperand(2).getReg());
1064 
1065     unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
1066     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1067     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1068                         ProdReg);
1069     MI.eraseFromParent();
1070     return Legalized;
1071   }
1072   case TargetOpcode::G_SMULO:
1073   case TargetOpcode::G_UMULO: {
1074     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1075     // result.
1076     unsigned Res = MI.getOperand(0).getReg();
1077     unsigned Overflow = MI.getOperand(1).getReg();
1078     unsigned LHS = MI.getOperand(2).getReg();
1079     unsigned RHS = MI.getOperand(3).getReg();
1080 
1081     MIRBuilder.buildMul(Res, LHS, RHS);
1082 
1083     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1084                           ? TargetOpcode::G_SMULH
1085                           : TargetOpcode::G_UMULH;
1086 
1087     unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1088     MIRBuilder.buildInstr(Opcode)
1089       .addDef(HiPart)
1090       .addUse(LHS)
1091       .addUse(RHS);
1092 
1093     unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1094     MIRBuilder.buildConstant(Zero, 0);
1095 
1096     // For *signed* multiply, overflow is detected by checking:
1097     // (hi != (lo >> bitwidth-1))
1098     if (Opcode == TargetOpcode::G_SMULH) {
1099       unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1100       unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1101       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1102       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1103         .addDef(Shifted)
1104         .addUse(Res)
1105         .addUse(ShiftAmt);
1106       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1107     } else {
1108       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1109     }
1110     MI.eraseFromParent();
1111     return Legalized;
1112   }
1113   case TargetOpcode::G_FNEG: {
1114     // TODO: Handle vector types once we are able to
1115     // represent them.
1116     if (Ty.isVector())
1117       return UnableToLegalize;
1118     unsigned Res = MI.getOperand(0).getReg();
1119     Type *ZeroTy;
1120     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1121     switch (Ty.getSizeInBits()) {
1122     case 16:
1123       ZeroTy = Type::getHalfTy(Ctx);
1124       break;
1125     case 32:
1126       ZeroTy = Type::getFloatTy(Ctx);
1127       break;
1128     case 64:
1129       ZeroTy = Type::getDoubleTy(Ctx);
1130       break;
1131     case 128:
1132       ZeroTy = Type::getFP128Ty(Ctx);
1133       break;
1134     default:
1135       llvm_unreachable("unexpected floating-point type");
1136     }
1137     ConstantFP &ZeroForNegation =
1138         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1139     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1140     MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1141         .addDef(Res)
1142         .addUse(Zero->getOperand(0).getReg())
1143         .addUse(MI.getOperand(1).getReg());
1144     MI.eraseFromParent();
1145     return Legalized;
1146   }
1147   case TargetOpcode::G_FSUB: {
1148     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1149     // First, check if G_FNEG is marked as Lower. If so, we may
1150     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1151     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1152       return UnableToLegalize;
1153     unsigned Res = MI.getOperand(0).getReg();
1154     unsigned LHS = MI.getOperand(1).getReg();
1155     unsigned RHS = MI.getOperand(2).getReg();
1156     unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1157     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1158     MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1159         .addDef(Res)
1160         .addUse(LHS)
1161         .addUse(Neg);
1162     MI.eraseFromParent();
1163     return Legalized;
1164   }
1165   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1166     unsigned OldValRes = MI.getOperand(0).getReg();
1167     unsigned SuccessRes = MI.getOperand(1).getReg();
1168     unsigned Addr = MI.getOperand(2).getReg();
1169     unsigned CmpVal = MI.getOperand(3).getReg();
1170     unsigned NewVal = MI.getOperand(4).getReg();
1171     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1172                                   **MI.memoperands_begin());
1173     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1174     MI.eraseFromParent();
1175     return Legalized;
1176   }
1177   case TargetOpcode::G_LOAD:
1178   case TargetOpcode::G_SEXTLOAD:
1179   case TargetOpcode::G_ZEXTLOAD: {
1180     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1181     unsigned DstReg = MI.getOperand(0).getReg();
1182     unsigned PtrReg = MI.getOperand(1).getReg();
1183     LLT DstTy = MRI.getType(DstReg);
1184     auto &MMO = **MI.memoperands_begin();
1185 
1186     if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1187       // In the case of G_LOAD, this was a non-extending load already and we're
1188       // about to lower to the same instruction.
1189       if (MI.getOpcode() == TargetOpcode::G_LOAD)
1190           return UnableToLegalize;
1191       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1192       MI.eraseFromParent();
1193       return Legalized;
1194     }
1195 
1196     if (DstTy.isScalar()) {
1197       unsigned TmpReg = MRI.createGenericVirtualRegister(
1198           LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1199       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1200       switch (MI.getOpcode()) {
1201       default:
1202         llvm_unreachable("Unexpected opcode");
1203       case TargetOpcode::G_LOAD:
1204         MIRBuilder.buildAnyExt(DstReg, TmpReg);
1205         break;
1206       case TargetOpcode::G_SEXTLOAD:
1207         MIRBuilder.buildSExt(DstReg, TmpReg);
1208         break;
1209       case TargetOpcode::G_ZEXTLOAD:
1210         MIRBuilder.buildZExt(DstReg, TmpReg);
1211         break;
1212       }
1213       MI.eraseFromParent();
1214       return Legalized;
1215     }
1216 
1217     return UnableToLegalize;
1218   }
1219   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1220   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1221   case TargetOpcode::G_CTLZ:
1222   case TargetOpcode::G_CTTZ:
1223   case TargetOpcode::G_CTPOP:
1224     return lowerBitCount(MI, TypeIdx, Ty);
1225   case G_UADDE: {
1226     unsigned Res = MI.getOperand(0).getReg();
1227     unsigned CarryOut = MI.getOperand(1).getReg();
1228     unsigned LHS = MI.getOperand(2).getReg();
1229     unsigned RHS = MI.getOperand(3).getReg();
1230     unsigned CarryIn = MI.getOperand(4).getReg();
1231 
1232     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1233     unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1234 
1235     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1236     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1237     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1238     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1239 
1240     MI.eraseFromParent();
1241     return Legalized;
1242   }
1243   }
1244 }
1245 
1246 LegalizerHelper::LegalizeResult
1247 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
1248                                      LLT NarrowTy) {
1249   // FIXME: Don't know how to handle secondary types yet.
1250   if (TypeIdx != 0)
1251     return UnableToLegalize;
1252 
1253   MIRBuilder.setInstr(MI);
1254   unsigned Opc = MI.getOpcode();
1255   switch (Opc) {
1256   default:
1257     return UnableToLegalize;
1258   case TargetOpcode::G_IMPLICIT_DEF: {
1259     SmallVector<unsigned, 2> DstRegs;
1260 
1261     unsigned NarrowSize = NarrowTy.getSizeInBits();
1262     unsigned DstReg = MI.getOperand(0).getReg();
1263     unsigned Size = MRI.getType(DstReg).getSizeInBits();
1264     int NumParts = Size / NarrowSize;
1265     // FIXME: Don't know how to handle the situation where the small vectors
1266     // aren't all the same size yet.
1267     if (Size % NarrowSize != 0)
1268       return UnableToLegalize;
1269 
1270     for (int i = 0; i < NumParts; ++i) {
1271       unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1272       MIRBuilder.buildUndef(TmpReg);
1273       DstRegs.push_back(TmpReg);
1274     }
1275 
1276     if (NarrowTy.isVector())
1277       MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1278     else
1279       MIRBuilder.buildBuildVector(DstReg, DstRegs);
1280 
1281     MI.eraseFromParent();
1282     return Legalized;
1283   }
1284   case TargetOpcode::G_ADD:
1285   case TargetOpcode::G_FADD:
1286   case TargetOpcode::G_FMUL:
1287   case TargetOpcode::G_FSUB:
1288   case TargetOpcode::G_FNEG:
1289   case TargetOpcode::G_FABS:
1290   case TargetOpcode::G_FDIV:
1291   case TargetOpcode::G_FREM:
1292   case TargetOpcode::G_FMA: {
1293     unsigned NarrowSize = NarrowTy.getSizeInBits();
1294     unsigned DstReg = MI.getOperand(0).getReg();
1295     unsigned Flags = MI.getFlags();
1296     unsigned Size = MRI.getType(DstReg).getSizeInBits();
1297     int NumParts = Size / NarrowSize;
1298     // FIXME: Don't know how to handle the situation where the small vectors
1299     // aren't all the same size yet.
1300     if (Size % NarrowSize != 0)
1301       return UnableToLegalize;
1302 
1303     unsigned NumOps = MI.getNumOperands() - 1;
1304     SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1305 
1306     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1307 
1308     if (NumOps >= 2)
1309       extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1310 
1311     if (NumOps >= 3)
1312       extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1313 
1314     for (int i = 0; i < NumParts; ++i) {
1315       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1316 
1317       if (NumOps == 1)
1318         MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1319       else if (NumOps == 2) {
1320         MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1321       } else if (NumOps == 3) {
1322         MIRBuilder.buildInstr(Opc, {DstReg},
1323                               {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1324       }
1325 
1326       DstRegs.push_back(DstReg);
1327     }
1328 
1329     if (NarrowTy.isVector())
1330       MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1331     else
1332       MIRBuilder.buildBuildVector(DstReg, DstRegs);
1333 
1334     MI.eraseFromParent();
1335     return Legalized;
1336   }
1337   case TargetOpcode::G_LOAD:
1338   case TargetOpcode::G_STORE: {
1339     bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1340     unsigned ValReg = MI.getOperand(0).getReg();
1341     unsigned AddrReg = MI.getOperand(1).getReg();
1342     unsigned NarrowSize = NarrowTy.getSizeInBits();
1343     unsigned Size = MRI.getType(ValReg).getSizeInBits();
1344     unsigned NumParts = Size / NarrowSize;
1345 
1346     SmallVector<unsigned, 8> NarrowRegs;
1347     if (!IsLoad)
1348       extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
1349 
1350     const LLT OffsetTy =
1351         LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
1352     MachineFunction &MF = *MI.getMF();
1353     MachineMemOperand *MMO = *MI.memoperands_begin();
1354     for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
1355       unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
1356       unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
1357       unsigned NewAddrReg = 0;
1358       MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
1359       MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
1360           MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
1361           NarrowTy.getSizeInBits() / 8, Alignment);
1362       if (IsLoad) {
1363         unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
1364         NarrowRegs.push_back(Dst);
1365         MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
1366       } else {
1367         MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
1368       }
1369     }
1370     if (IsLoad) {
1371       if (NarrowTy.isVector())
1372         MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
1373       else
1374         MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
1375     }
1376     MI.eraseFromParent();
1377     return Legalized;
1378   }
1379   }
1380 }
1381 
1382 LegalizerHelper::LegalizeResult
1383 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1384   unsigned Opc = MI.getOpcode();
1385   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1386   auto isSupported = [this](const LegalityQuery &Q) {
1387     auto QAction = LI.getAction(Q).Action;
1388     return QAction == Legal || QAction == Libcall || QAction == Custom;
1389   };
1390   switch (Opc) {
1391   default:
1392     return UnableToLegalize;
1393   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1394     // This trivially expands to CTLZ.
1395     Observer.changingInstr(MI);
1396     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
1397     Observer.changedInstr(MI);
1398     return Legalized;
1399   }
1400   case TargetOpcode::G_CTLZ: {
1401     unsigned SrcReg = MI.getOperand(1).getReg();
1402     unsigned Len = Ty.getSizeInBits();
1403     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1404       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
1405       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
1406                                              {Ty}, {SrcReg});
1407       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1408       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1409       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1410                                           SrcReg, MIBZero);
1411       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1412                              MIBCtlzZU);
1413       MI.eraseFromParent();
1414       return Legalized;
1415     }
1416     // for now, we do this:
1417     // NewLen = NextPowerOf2(Len);
1418     // x = x | (x >> 1);
1419     // x = x | (x >> 2);
1420     // ...
1421     // x = x | (x >>16);
1422     // x = x | (x >>32); // for 64-bit input
1423     // Upto NewLen/2
1424     // return Len - popcount(x);
1425     //
1426     // Ref: "Hacker's Delight" by Henry Warren
1427     unsigned Op = SrcReg;
1428     unsigned NewLen = PowerOf2Ceil(Len);
1429     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
1430       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
1431       auto MIBOp = MIRBuilder.buildInstr(
1432           TargetOpcode::G_OR, {Ty},
1433           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
1434                                      {Op, MIBShiftAmt})});
1435       Op = MIBOp->getOperand(0).getReg();
1436     }
1437     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
1438     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1439                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
1440     MI.eraseFromParent();
1441     return Legalized;
1442   }
1443   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
1444     // This trivially expands to CTTZ.
1445     Observer.changingInstr(MI);
1446     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
1447     Observer.changedInstr(MI);
1448     return Legalized;
1449   }
1450   case TargetOpcode::G_CTTZ: {
1451     unsigned SrcReg = MI.getOperand(1).getReg();
1452     unsigned Len = Ty.getSizeInBits();
1453     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
1454       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1455       // zero.
1456       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
1457                                              {Ty}, {SrcReg});
1458       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1459       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1460       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1461                                           SrcReg, MIBZero);
1462       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1463                              MIBCttzZU);
1464       MI.eraseFromParent();
1465       return Legalized;
1466     }
1467     // for now, we use: { return popcount(~x & (x - 1)); }
1468     // unless the target has ctlz but not ctpop, in which case we use:
1469     // { return 32 - nlz(~x & (x-1)); }
1470     // Ref: "Hacker's Delight" by Henry Warren
1471     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
1472     auto MIBNot =
1473         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
1474     auto MIBTmp = MIRBuilder.buildInstr(
1475         TargetOpcode::G_AND, {Ty},
1476         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
1477                                        {SrcReg, MIBCstNeg1})});
1478     if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
1479         isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
1480       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
1481       MIRBuilder.buildInstr(
1482           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1483           {MIBCstLen,
1484            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
1485       MI.eraseFromParent();
1486       return Legalized;
1487     }
1488     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
1489     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
1490     return Legalized;
1491   }
1492   }
1493 }
1494