1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This file implements the LegalizerHelper class to legalize
11 /// individual instructions and the LegalizeMachineIR wrapper pass for the
12 /// primary legalization.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
19 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #define DEBUG_TYPE "legalizer"
29 
30 using namespace llvm;
31 using namespace LegalizeActions;
32 
LegalizerHelper(MachineFunction & MF,GISelChangeObserver & Observer,MachineIRBuilder & Builder)33 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
34                                  GISelChangeObserver &Observer,
35                                  MachineIRBuilder &Builder)
36     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
37       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
38   MIRBuilder.setMF(MF);
39   MIRBuilder.setChangeObserver(Observer);
40 }
41 
LegalizerHelper(MachineFunction & MF,const LegalizerInfo & LI,GISelChangeObserver & Observer,MachineIRBuilder & B)42 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
43                                  GISelChangeObserver &Observer,
44                                  MachineIRBuilder &B)
45     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
46   MIRBuilder.setMF(MF);
47   MIRBuilder.setChangeObserver(Observer);
48 }
49 LegalizerHelper::LegalizeResult
legalizeInstrStep(MachineInstr & MI)50 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
51   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
52 
53   auto Step = LI.getAction(MI, MRI);
54   switch (Step.Action) {
55   case Legal:
56     LLVM_DEBUG(dbgs() << ".. Already legal\n");
57     return AlreadyLegal;
58   case Libcall:
59     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
60     return libcall(MI);
61   case NarrowScalar:
62     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
63     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
64   case WidenScalar:
65     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
66     return widenScalar(MI, Step.TypeIdx, Step.NewType);
67   case Lower:
68     LLVM_DEBUG(dbgs() << ".. Lower\n");
69     return lower(MI, Step.TypeIdx, Step.NewType);
70   case FewerElements:
71     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
72     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
73   case Custom:
74     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
75     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
76                                                             : UnableToLegalize;
77   default:
78     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
79     return UnableToLegalize;
80   }
81 }
82 
extractParts(unsigned Reg,LLT Ty,int NumParts,SmallVectorImpl<unsigned> & VRegs)83 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
84                                    SmallVectorImpl<unsigned> &VRegs) {
85   for (int i = 0; i < NumParts; ++i)
86     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
87   MIRBuilder.buildUnmerge(VRegs, Reg);
88 }
89 
getRTLibDesc(unsigned Opcode,unsigned Size)90 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
91   switch (Opcode) {
92   case TargetOpcode::G_SDIV:
93     assert((Size == 32 || Size == 64) && "Unsupported size");
94     return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
95   case TargetOpcode::G_UDIV:
96     assert((Size == 32 || Size == 64) && "Unsupported size");
97     return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
98   case TargetOpcode::G_SREM:
99     assert((Size == 32 || Size == 64) && "Unsupported size");
100     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
101   case TargetOpcode::G_UREM:
102     assert((Size == 32 || Size == 64) && "Unsupported size");
103     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
104   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
105     assert(Size == 32 && "Unsupported size");
106     return RTLIB::CTLZ_I32;
107   case TargetOpcode::G_FADD:
108     assert((Size == 32 || Size == 64) && "Unsupported size");
109     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
110   case TargetOpcode::G_FSUB:
111     assert((Size == 32 || Size == 64) && "Unsupported size");
112     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
113   case TargetOpcode::G_FMUL:
114     assert((Size == 32 || Size == 64) && "Unsupported size");
115     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
116   case TargetOpcode::G_FDIV:
117     assert((Size == 32 || Size == 64) && "Unsupported size");
118     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
119   case TargetOpcode::G_FREM:
120     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
121   case TargetOpcode::G_FPOW:
122     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
123   case TargetOpcode::G_FMA:
124     assert((Size == 32 || Size == 64) && "Unsupported size");
125     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
126   }
127   llvm_unreachable("Unknown libcall function");
128 }
129 
130 LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder & MIRBuilder,RTLIB::Libcall Libcall,const CallLowering::ArgInfo & Result,ArrayRef<CallLowering::ArgInfo> Args)131 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
132                     const CallLowering::ArgInfo &Result,
133                     ArrayRef<CallLowering::ArgInfo> Args) {
134   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
135   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
136   const char *Name = TLI.getLibcallName(Libcall);
137 
138   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
139   if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
140                      MachineOperand::CreateES(Name), Result, Args))
141     return LegalizerHelper::UnableToLegalize;
142 
143   return LegalizerHelper::Legalized;
144 }
145 
146 // Useful for libcalls where all operands have the same type.
147 static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,unsigned Size,Type * OpType)148 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
149               Type *OpType) {
150   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
151 
152   SmallVector<CallLowering::ArgInfo, 3> Args;
153   for (unsigned i = 1; i < MI.getNumOperands(); i++)
154     Args.push_back({MI.getOperand(i).getReg(), OpType});
155   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
156                        Args);
157 }
158 
getConvRTLibDesc(unsigned Opcode,Type * ToType,Type * FromType)159 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
160                                        Type *FromType) {
161   auto ToMVT = MVT::getVT(ToType);
162   auto FromMVT = MVT::getVT(FromType);
163 
164   switch (Opcode) {
165   case TargetOpcode::G_FPEXT:
166     return RTLIB::getFPEXT(FromMVT, ToMVT);
167   case TargetOpcode::G_FPTRUNC:
168     return RTLIB::getFPROUND(FromMVT, ToMVT);
169   case TargetOpcode::G_FPTOSI:
170     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
171   case TargetOpcode::G_FPTOUI:
172     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
173   case TargetOpcode::G_SITOFP:
174     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
175   case TargetOpcode::G_UITOFP:
176     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
177   }
178   llvm_unreachable("Unsupported libcall function");
179 }
180 
181 static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,Type * ToType,Type * FromType)182 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
183                   Type *FromType) {
184   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
185   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
186                        {{MI.getOperand(1).getReg(), FromType}});
187 }
188 
189 LegalizerHelper::LegalizeResult
libcall(MachineInstr & MI)190 LegalizerHelper::libcall(MachineInstr &MI) {
191   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
192   unsigned Size = LLTy.getSizeInBits();
193   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
194 
195   MIRBuilder.setInstr(MI);
196 
197   switch (MI.getOpcode()) {
198   default:
199     return UnableToLegalize;
200   case TargetOpcode::G_SDIV:
201   case TargetOpcode::G_UDIV:
202   case TargetOpcode::G_SREM:
203   case TargetOpcode::G_UREM:
204   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
205     Type *HLTy = IntegerType::get(Ctx, Size);
206     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
207     if (Status != Legalized)
208       return Status;
209     break;
210   }
211   case TargetOpcode::G_FADD:
212   case TargetOpcode::G_FSUB:
213   case TargetOpcode::G_FMUL:
214   case TargetOpcode::G_FDIV:
215   case TargetOpcode::G_FMA:
216   case TargetOpcode::G_FPOW:
217   case TargetOpcode::G_FREM: {
218     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
219     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
220     if (Status != Legalized)
221       return Status;
222     break;
223   }
224   case TargetOpcode::G_FPEXT: {
225     // FIXME: Support other floating point types (half, fp128 etc)
226     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
227     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
228     if (ToSize != 64 || FromSize != 32)
229       return UnableToLegalize;
230     LegalizeResult Status = conversionLibcall(
231         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
232     if (Status != Legalized)
233       return Status;
234     break;
235   }
236   case TargetOpcode::G_FPTRUNC: {
237     // FIXME: Support other floating point types (half, fp128 etc)
238     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
239     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
240     if (ToSize != 32 || FromSize != 64)
241       return UnableToLegalize;
242     LegalizeResult Status = conversionLibcall(
243         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
244     if (Status != Legalized)
245       return Status;
246     break;
247   }
248   case TargetOpcode::G_FPTOSI:
249   case TargetOpcode::G_FPTOUI: {
250     // FIXME: Support other types
251     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
252     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
253     if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
254       return UnableToLegalize;
255     LegalizeResult Status = conversionLibcall(
256         MI, MIRBuilder, Type::getInt32Ty(Ctx),
257         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
258     if (Status != Legalized)
259       return Status;
260     break;
261   }
262   case TargetOpcode::G_SITOFP:
263   case TargetOpcode::G_UITOFP: {
264     // FIXME: Support other types
265     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
266     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
267     if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
268       return UnableToLegalize;
269     LegalizeResult Status = conversionLibcall(
270         MI, MIRBuilder,
271         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
272         Type::getInt32Ty(Ctx));
273     if (Status != Legalized)
274       return Status;
275     break;
276   }
277   }
278 
279   MI.eraseFromParent();
280   return Legalized;
281 }
282 
narrowScalar(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)283 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
284                                                               unsigned TypeIdx,
285                                                               LLT NarrowTy) {
286   // FIXME: Don't know how to handle secondary types yet.
287   if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
288     return UnableToLegalize;
289 
290   MIRBuilder.setInstr(MI);
291 
292   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
293   uint64_t NarrowSize = NarrowTy.getSizeInBits();
294 
295   switch (MI.getOpcode()) {
296   default:
297     return UnableToLegalize;
298   case TargetOpcode::G_IMPLICIT_DEF: {
299     // FIXME: add support for when SizeOp0 isn't an exact multiple of
300     // NarrowSize.
301     if (SizeOp0 % NarrowSize != 0)
302       return UnableToLegalize;
303     int NumParts = SizeOp0 / NarrowSize;
304 
305     SmallVector<unsigned, 2> DstRegs;
306     for (int i = 0; i < NumParts; ++i)
307       DstRegs.push_back(
308           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
309 
310     unsigned DstReg = MI.getOperand(0).getReg();
311     if(MRI.getType(DstReg).isVector())
312       MIRBuilder.buildBuildVector(DstReg, DstRegs);
313     else
314       MIRBuilder.buildMerge(DstReg, DstRegs);
315     MI.eraseFromParent();
316     return Legalized;
317   }
318   case TargetOpcode::G_ADD: {
319     // FIXME: add support for when SizeOp0 isn't an exact multiple of
320     // NarrowSize.
321     if (SizeOp0 % NarrowSize != 0)
322       return UnableToLegalize;
323     // Expand in terms of carry-setting/consuming G_ADDE instructions.
324     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
325 
326     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
327     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
328     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
329 
330     unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
331     MIRBuilder.buildConstant(CarryIn, 0);
332 
333     for (int i = 0; i < NumParts; ++i) {
334       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
335       unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
336 
337       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
338                             Src2Regs[i], CarryIn);
339 
340       DstRegs.push_back(DstReg);
341       CarryIn = CarryOut;
342     }
343     unsigned DstReg = MI.getOperand(0).getReg();
344     if(MRI.getType(DstReg).isVector())
345       MIRBuilder.buildBuildVector(DstReg, DstRegs);
346     else
347       MIRBuilder.buildMerge(DstReg, DstRegs);
348     MI.eraseFromParent();
349     return Legalized;
350   }
351   case TargetOpcode::G_EXTRACT: {
352     if (TypeIdx != 1)
353       return UnableToLegalize;
354 
355     int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
356     // FIXME: add support for when SizeOp1 isn't an exact multiple of
357     // NarrowSize.
358     if (SizeOp1 % NarrowSize != 0)
359       return UnableToLegalize;
360     int NumParts = SizeOp1 / NarrowSize;
361 
362     SmallVector<unsigned, 2> SrcRegs, DstRegs;
363     SmallVector<uint64_t, 2> Indexes;
364     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
365 
366     unsigned OpReg = MI.getOperand(0).getReg();
367     uint64_t OpStart = MI.getOperand(2).getImm();
368     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
369     for (int i = 0; i < NumParts; ++i) {
370       unsigned SrcStart = i * NarrowSize;
371 
372       if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
373         // No part of the extract uses this subregister, ignore it.
374         continue;
375       } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
376         // The entire subregister is extracted, forward the value.
377         DstRegs.push_back(SrcRegs[i]);
378         continue;
379       }
380 
381       // OpSegStart is where this destination segment would start in OpReg if it
382       // extended infinitely in both directions.
383       int64_t ExtractOffset;
384       uint64_t SegSize;
385       if (OpStart < SrcStart) {
386         ExtractOffset = 0;
387         SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
388       } else {
389         ExtractOffset = OpStart - SrcStart;
390         SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
391       }
392 
393       unsigned SegReg = SrcRegs[i];
394       if (ExtractOffset != 0 || SegSize != NarrowSize) {
395         // A genuine extract is needed.
396         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
397         MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
398       }
399 
400       DstRegs.push_back(SegReg);
401     }
402 
403     unsigned DstReg = MI.getOperand(0).getReg();
404     if(MRI.getType(DstReg).isVector())
405       MIRBuilder.buildBuildVector(DstReg, DstRegs);
406     else
407       MIRBuilder.buildMerge(DstReg, DstRegs);
408     MI.eraseFromParent();
409     return Legalized;
410   }
411   case TargetOpcode::G_INSERT: {
412     // FIXME: add support for when SizeOp0 isn't an exact multiple of
413     // NarrowSize.
414     if (SizeOp0 % NarrowSize != 0)
415       return UnableToLegalize;
416 
417     int NumParts = SizeOp0 / NarrowSize;
418 
419     SmallVector<unsigned, 2> SrcRegs, DstRegs;
420     SmallVector<uint64_t, 2> Indexes;
421     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
422 
423     unsigned OpReg = MI.getOperand(2).getReg();
424     uint64_t OpStart = MI.getOperand(3).getImm();
425     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
426     for (int i = 0; i < NumParts; ++i) {
427       unsigned DstStart = i * NarrowSize;
428 
429       if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
430         // No part of the insert affects this subregister, forward the original.
431         DstRegs.push_back(SrcRegs[i]);
432         continue;
433       } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
434         // The entire subregister is defined by this insert, forward the new
435         // value.
436         DstRegs.push_back(OpReg);
437         continue;
438       }
439 
440       // OpSegStart is where this destination segment would start in OpReg if it
441       // extended infinitely in both directions.
442       int64_t ExtractOffset, InsertOffset;
443       uint64_t SegSize;
444       if (OpStart < DstStart) {
445         InsertOffset = 0;
446         ExtractOffset = DstStart - OpStart;
447         SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
448       } else {
449         InsertOffset = OpStart - DstStart;
450         ExtractOffset = 0;
451         SegSize =
452             std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
453       }
454 
455       unsigned SegReg = OpReg;
456       if (ExtractOffset != 0 || SegSize != OpSize) {
457         // A genuine extract is needed.
458         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
459         MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
460       }
461 
462       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
463       MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
464       DstRegs.push_back(DstReg);
465     }
466 
467     assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
468     unsigned DstReg = MI.getOperand(0).getReg();
469     if(MRI.getType(DstReg).isVector())
470       MIRBuilder.buildBuildVector(DstReg, DstRegs);
471     else
472       MIRBuilder.buildMerge(DstReg, DstRegs);
473     MI.eraseFromParent();
474     return Legalized;
475   }
476   case TargetOpcode::G_LOAD: {
477     // FIXME: add support for when SizeOp0 isn't an exact multiple of
478     // NarrowSize.
479     if (SizeOp0 % NarrowSize != 0)
480       return UnableToLegalize;
481 
482     const auto &MMO = **MI.memoperands_begin();
483     // This implementation doesn't work for atomics. Give up instead of doing
484     // something invalid.
485     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
486         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
487       return UnableToLegalize;
488 
489     int NumParts = SizeOp0 / NarrowSize;
490     LLT OffsetTy = LLT::scalar(
491         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
492 
493     SmallVector<unsigned, 2> DstRegs;
494     for (int i = 0; i < NumParts; ++i) {
495       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
496       unsigned SrcReg = 0;
497       unsigned Adjustment = i * NarrowSize / 8;
498       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
499 
500       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
501           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
502           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
503           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
504 
505       MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
506                                 Adjustment);
507 
508       MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
509 
510       DstRegs.push_back(DstReg);
511     }
512     unsigned DstReg = MI.getOperand(0).getReg();
513     if(MRI.getType(DstReg).isVector())
514       MIRBuilder.buildBuildVector(DstReg, DstRegs);
515     else
516       MIRBuilder.buildMerge(DstReg, DstRegs);
517     MI.eraseFromParent();
518     return Legalized;
519   }
520   case TargetOpcode::G_STORE: {
521     // FIXME: add support for when SizeOp0 isn't an exact multiple of
522     // NarrowSize.
523     if (SizeOp0 % NarrowSize != 0)
524       return UnableToLegalize;
525 
526     const auto &MMO = **MI.memoperands_begin();
527     // This implementation doesn't work for atomics. Give up instead of doing
528     // something invalid.
529     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
530         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
531       return UnableToLegalize;
532 
533     int NumParts = SizeOp0 / NarrowSize;
534     LLT OffsetTy = LLT::scalar(
535         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
536 
537     SmallVector<unsigned, 2> SrcRegs;
538     extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
539 
540     for (int i = 0; i < NumParts; ++i) {
541       unsigned DstReg = 0;
542       unsigned Adjustment = i * NarrowSize / 8;
543       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
544 
545       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
546           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
547           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
548           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
549 
550       MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
551                                 Adjustment);
552 
553       MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
554     }
555     MI.eraseFromParent();
556     return Legalized;
557   }
558   case TargetOpcode::G_CONSTANT: {
559     // FIXME: add support for when SizeOp0 isn't an exact multiple of
560     // NarrowSize.
561     if (SizeOp0 % NarrowSize != 0)
562       return UnableToLegalize;
563     int NumParts = SizeOp0 / NarrowSize;
564     const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
565     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
566 
567     SmallVector<unsigned, 2> DstRegs;
568     for (int i = 0; i < NumParts; ++i) {
569       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
570       ConstantInt *CI =
571           ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
572       MIRBuilder.buildConstant(DstReg, *CI);
573       DstRegs.push_back(DstReg);
574     }
575     unsigned DstReg = MI.getOperand(0).getReg();
576     if(MRI.getType(DstReg).isVector())
577       MIRBuilder.buildBuildVector(DstReg, DstRegs);
578     else
579       MIRBuilder.buildMerge(DstReg, DstRegs);
580     MI.eraseFromParent();
581     return Legalized;
582   }
583   case TargetOpcode::G_AND:
584   case TargetOpcode::G_OR:
585   case TargetOpcode::G_XOR: {
586     // Legalize bitwise operation:
587     // A = BinOp<Ty> B, C
588     // into:
589     // B1, ..., BN = G_UNMERGE_VALUES B
590     // C1, ..., CN = G_UNMERGE_VALUES C
591     // A1 = BinOp<Ty/N> B1, C2
592     // ...
593     // AN = BinOp<Ty/N> BN, CN
594     // A = G_MERGE_VALUES A1, ..., AN
595 
596     // FIXME: add support for when SizeOp0 isn't an exact multiple of
597     // NarrowSize.
598     if (SizeOp0 % NarrowSize != 0)
599       return UnableToLegalize;
600     int NumParts = SizeOp0 / NarrowSize;
601 
602     // List the registers where the destination will be scattered.
603     SmallVector<unsigned, 2> DstRegs;
604     // List the registers where the first argument will be split.
605     SmallVector<unsigned, 2> SrcsReg1;
606     // List the registers where the second argument will be split.
607     SmallVector<unsigned, 2> SrcsReg2;
608     // Create all the temporary registers.
609     for (int i = 0; i < NumParts; ++i) {
610       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
611       unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
612       unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
613 
614       DstRegs.push_back(DstReg);
615       SrcsReg1.push_back(SrcReg1);
616       SrcsReg2.push_back(SrcReg2);
617     }
618     // Explode the big arguments into smaller chunks.
619     MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
620     MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
621 
622     // Do the operation on each small part.
623     for (int i = 0; i < NumParts; ++i)
624       MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
625                             {SrcsReg1[i], SrcsReg2[i]});
626 
627     // Gather the destination registers into the final destination.
628     unsigned DstReg = MI.getOperand(0).getReg();
629     if(MRI.getType(DstReg).isVector())
630       MIRBuilder.buildBuildVector(DstReg, DstRegs);
631     else
632       MIRBuilder.buildMerge(DstReg, DstRegs);
633     MI.eraseFromParent();
634     return Legalized;
635   }
636   }
637 }
638 
widenScalarSrc(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned ExtOpcode)639 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
640                                      unsigned OpIdx, unsigned ExtOpcode) {
641   MachineOperand &MO = MI.getOperand(OpIdx);
642   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
643   MO.setReg(ExtB->getOperand(0).getReg());
644 }
645 
widenScalarDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned TruncOpcode)646 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
647                                      unsigned OpIdx, unsigned TruncOpcode) {
648   MachineOperand &MO = MI.getOperand(OpIdx);
649   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
650   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
651   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
652   MO.setReg(DstExt);
653 }
654 
655 LegalizerHelper::LegalizeResult
widenScalar(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)656 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
657   MIRBuilder.setInstr(MI);
658 
659   switch (MI.getOpcode()) {
660   default:
661     return UnableToLegalize;
662   case TargetOpcode::G_UADDO:
663   case TargetOpcode::G_USUBO: {
664     if (TypeIdx == 1)
665       return UnableToLegalize; // TODO
666     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
667                                          {MI.getOperand(2).getReg()});
668     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
669                                          {MI.getOperand(3).getReg()});
670     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
671                           ? TargetOpcode::G_ADD
672                           : TargetOpcode::G_SUB;
673     // Do the arithmetic in the larger type.
674     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
675     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
676     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
677     auto AndOp = MIRBuilder.buildInstr(
678         TargetOpcode::G_AND, {WideTy},
679         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
680     // There is no overflow if the AndOp is the same as NewOp.
681     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
682                          AndOp);
683     // Now trunc the NewOp to the original result.
684     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
685     MI.eraseFromParent();
686     return Legalized;
687   }
688   case TargetOpcode::G_CTTZ:
689   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
690   case TargetOpcode::G_CTLZ:
691   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
692   case TargetOpcode::G_CTPOP: {
693     // First ZEXT the input.
694     auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
695     LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
696     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
697       // The count is the same in the larger type except if the original
698       // value was zero.  This can be handled by setting the bit just off
699       // the top of the original type.
700       auto TopBit =
701           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
702       MIBSrc = MIRBuilder.buildInstr(
703           TargetOpcode::G_OR, {WideTy},
704           {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
705     }
706     // Perform the operation at the larger size.
707     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
708     // This is already the correct result for CTPOP and CTTZs
709     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
710         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
711       // The correct result is NewOp - (Difference in widety and current ty).
712       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
713       MIBNewOp = MIRBuilder.buildInstr(
714           TargetOpcode::G_SUB, {WideTy},
715           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
716     }
717     auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
718     // Make the original instruction a trunc now, and update its source.
719     Observer.changingInstr(MI);
720     MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
721     MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
722     Observer.changedInstr(MI);
723     return Legalized;
724   }
725 
726   case TargetOpcode::G_ADD:
727   case TargetOpcode::G_AND:
728   case TargetOpcode::G_MUL:
729   case TargetOpcode::G_OR:
730   case TargetOpcode::G_XOR:
731   case TargetOpcode::G_SUB:
732     // Perform operation at larger width (any extension is fine here, high bits
733     // don't affect the result) and then truncate the result back to the
734     // original type.
735     Observer.changingInstr(MI);
736     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
737     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
738     widenScalarDst(MI, WideTy);
739     Observer.changedInstr(MI);
740     return Legalized;
741 
742   case TargetOpcode::G_SHL:
743     Observer.changingInstr(MI);
744     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
745     // The "number of bits to shift" operand must preserve its value as an
746     // unsigned integer:
747     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
748     widenScalarDst(MI, WideTy);
749     Observer.changedInstr(MI);
750     return Legalized;
751 
752   case TargetOpcode::G_SDIV:
753   case TargetOpcode::G_SREM:
754     Observer.changingInstr(MI);
755     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
756     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
757     widenScalarDst(MI, WideTy);
758     Observer.changedInstr(MI);
759     return Legalized;
760 
761   case TargetOpcode::G_ASHR:
762     Observer.changingInstr(MI);
763     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
764     // The "number of bits to shift" operand must preserve its value as an
765     // unsigned integer:
766     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
767     widenScalarDst(MI, WideTy);
768     Observer.changedInstr(MI);
769     return Legalized;
770 
771   case TargetOpcode::G_UDIV:
772   case TargetOpcode::G_UREM:
773   case TargetOpcode::G_LSHR:
774     Observer.changingInstr(MI);
775     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
776     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
777     widenScalarDst(MI, WideTy);
778     Observer.changedInstr(MI);
779     return Legalized;
780 
781   case TargetOpcode::G_SELECT:
782     Observer.changingInstr(MI);
783     if (TypeIdx == 0) {
784       // Perform operation at larger width (any extension is fine here, high
785       // bits don't affect the result) and then truncate the result back to the
786       // original type.
787       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
788       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
789       widenScalarDst(MI, WideTy);
790     } else {
791       // Explicit extension is required here since high bits affect the result.
792       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
793     }
794     Observer.changedInstr(MI);
795     return Legalized;
796 
797   case TargetOpcode::G_FPTOSI:
798   case TargetOpcode::G_FPTOUI:
799     if (TypeIdx != 0)
800       return UnableToLegalize;
801     Observer.changingInstr(MI);
802     widenScalarDst(MI, WideTy);
803     Observer.changedInstr(MI);
804     return Legalized;
805 
806   case TargetOpcode::G_SITOFP:
807     if (TypeIdx != 1)
808       return UnableToLegalize;
809     Observer.changingInstr(MI);
810     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
811     Observer.changedInstr(MI);
812     return Legalized;
813 
814   case TargetOpcode::G_UITOFP:
815     if (TypeIdx != 1)
816       return UnableToLegalize;
817     Observer.changingInstr(MI);
818     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
819     Observer.changedInstr(MI);
820     return Legalized;
821 
822   case TargetOpcode::G_INSERT:
823     if (TypeIdx != 0)
824       return UnableToLegalize;
825     Observer.changingInstr(MI);
826     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
827     widenScalarDst(MI, WideTy);
828     Observer.changedInstr(MI);
829     return Legalized;
830 
831   case TargetOpcode::G_LOAD:
832     // For some types like i24, we might try to widen to i32. To properly handle
833     // this we should be using a dedicated extending load, until then avoid
834     // trying to legalize.
835     if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
836         WideTy.getSizeInBits())
837       return UnableToLegalize;
838     LLVM_FALLTHROUGH;
839   case TargetOpcode::G_SEXTLOAD:
840   case TargetOpcode::G_ZEXTLOAD:
841     Observer.changingInstr(MI);
842     widenScalarDst(MI, WideTy);
843     Observer.changedInstr(MI);
844     return Legalized;
845 
846   case TargetOpcode::G_STORE: {
847     if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
848         WideTy != LLT::scalar(8))
849       return UnableToLegalize;
850 
851     Observer.changingInstr(MI);
852     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
853     Observer.changedInstr(MI);
854     return Legalized;
855   }
856   case TargetOpcode::G_CONSTANT: {
857     MachineOperand &SrcMO = MI.getOperand(1);
858     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
859     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
860     Observer.changingInstr(MI);
861     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
862 
863     widenScalarDst(MI, WideTy);
864     Observer.changedInstr(MI);
865     return Legalized;
866   }
867   case TargetOpcode::G_FCONSTANT: {
868     MachineOperand &SrcMO = MI.getOperand(1);
869     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
870     APFloat Val = SrcMO.getFPImm()->getValueAPF();
871     bool LosesInfo;
872     switch (WideTy.getSizeInBits()) {
873     case 32:
874       Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
875       break;
876     case 64:
877       Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
878       break;
879     default:
880       llvm_unreachable("Unhandled fp widen type");
881     }
882     Observer.changingInstr(MI);
883     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
884 
885     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
886     Observer.changedInstr(MI);
887     return Legalized;
888   }
889   case TargetOpcode::G_IMPLICIT_DEF: {
890     Observer.changingInstr(MI);
891     widenScalarDst(MI, WideTy);
892     Observer.changedInstr(MI);
893     return Legalized;
894   }
895   case TargetOpcode::G_BRCOND:
896     Observer.changingInstr(MI);
897     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
898     Observer.changedInstr(MI);
899     return Legalized;
900 
901   case TargetOpcode::G_FCMP:
902     Observer.changingInstr(MI);
903     if (TypeIdx == 0)
904       widenScalarDst(MI, WideTy);
905     else {
906       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
907       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
908     }
909     Observer.changedInstr(MI);
910     return Legalized;
911 
912   case TargetOpcode::G_ICMP:
913     Observer.changingInstr(MI);
914     if (TypeIdx == 0)
915       widenScalarDst(MI, WideTy);
916     else {
917       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
918                                MI.getOperand(1).getPredicate()))
919                                ? TargetOpcode::G_SEXT
920                                : TargetOpcode::G_ZEXT;
921       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
922       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
923     }
924     Observer.changedInstr(MI);
925     return Legalized;
926 
927   case TargetOpcode::G_GEP:
928     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
929     Observer.changingInstr(MI);
930     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
931     Observer.changedInstr(MI);
932     return Legalized;
933 
934   case TargetOpcode::G_PHI: {
935     assert(TypeIdx == 0 && "Expecting only Idx 0");
936 
937     Observer.changingInstr(MI);
938     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
939       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
940       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
941       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
942     }
943 
944     MachineBasicBlock &MBB = *MI.getParent();
945     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
946     widenScalarDst(MI, WideTy);
947     Observer.changedInstr(MI);
948     return Legalized;
949   }
950   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
951     if (TypeIdx != 2)
952       return UnableToLegalize;
953     Observer.changingInstr(MI);
954     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
955     Observer.changedInstr(MI);
956     return Legalized;
957 
958   case TargetOpcode::G_FCEIL:
959     if (TypeIdx != 0)
960       return UnableToLegalize;
961     Observer.changingInstr(MI);
962     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
963     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
964     Observer.changedInstr(MI);
965     return Legalized;
966   }
967 }
968 
969 LegalizerHelper::LegalizeResult
lower(MachineInstr & MI,unsigned TypeIdx,LLT Ty)970 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
971   using namespace TargetOpcode;
972   MIRBuilder.setInstr(MI);
973 
974   switch(MI.getOpcode()) {
975   default:
976     return UnableToLegalize;
977   case TargetOpcode::G_SREM:
978   case TargetOpcode::G_UREM: {
979     unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
980     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
981         .addDef(QuotReg)
982         .addUse(MI.getOperand(1).getReg())
983         .addUse(MI.getOperand(2).getReg());
984 
985     unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
986     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
987     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
988                         ProdReg);
989     MI.eraseFromParent();
990     return Legalized;
991   }
992   case TargetOpcode::G_SMULO:
993   case TargetOpcode::G_UMULO: {
994     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
995     // result.
996     unsigned Res = MI.getOperand(0).getReg();
997     unsigned Overflow = MI.getOperand(1).getReg();
998     unsigned LHS = MI.getOperand(2).getReg();
999     unsigned RHS = MI.getOperand(3).getReg();
1000 
1001     MIRBuilder.buildMul(Res, LHS, RHS);
1002 
1003     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1004                           ? TargetOpcode::G_SMULH
1005                           : TargetOpcode::G_UMULH;
1006 
1007     unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1008     MIRBuilder.buildInstr(Opcode)
1009       .addDef(HiPart)
1010       .addUse(LHS)
1011       .addUse(RHS);
1012 
1013     unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1014     MIRBuilder.buildConstant(Zero, 0);
1015 
1016     // For *signed* multiply, overflow is detected by checking:
1017     // (hi != (lo >> bitwidth-1))
1018     if (Opcode == TargetOpcode::G_SMULH) {
1019       unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1020       unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1021       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1022       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1023         .addDef(Shifted)
1024         .addUse(Res)
1025         .addUse(ShiftAmt);
1026       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1027     } else {
1028       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1029     }
1030     MI.eraseFromParent();
1031     return Legalized;
1032   }
1033   case TargetOpcode::G_FNEG: {
1034     // TODO: Handle vector types once we are able to
1035     // represent them.
1036     if (Ty.isVector())
1037       return UnableToLegalize;
1038     unsigned Res = MI.getOperand(0).getReg();
1039     Type *ZeroTy;
1040     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1041     switch (Ty.getSizeInBits()) {
1042     case 16:
1043       ZeroTy = Type::getHalfTy(Ctx);
1044       break;
1045     case 32:
1046       ZeroTy = Type::getFloatTy(Ctx);
1047       break;
1048     case 64:
1049       ZeroTy = Type::getDoubleTy(Ctx);
1050       break;
1051     case 128:
1052       ZeroTy = Type::getFP128Ty(Ctx);
1053       break;
1054     default:
1055       llvm_unreachable("unexpected floating-point type");
1056     }
1057     ConstantFP &ZeroForNegation =
1058         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1059     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1060     MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1061         .addDef(Res)
1062         .addUse(Zero->getOperand(0).getReg())
1063         .addUse(MI.getOperand(1).getReg());
1064     MI.eraseFromParent();
1065     return Legalized;
1066   }
1067   case TargetOpcode::G_FSUB: {
1068     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1069     // First, check if G_FNEG is marked as Lower. If so, we may
1070     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1071     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1072       return UnableToLegalize;
1073     unsigned Res = MI.getOperand(0).getReg();
1074     unsigned LHS = MI.getOperand(1).getReg();
1075     unsigned RHS = MI.getOperand(2).getReg();
1076     unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1077     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1078     MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1079         .addDef(Res)
1080         .addUse(LHS)
1081         .addUse(Neg);
1082     MI.eraseFromParent();
1083     return Legalized;
1084   }
1085   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1086     unsigned OldValRes = MI.getOperand(0).getReg();
1087     unsigned SuccessRes = MI.getOperand(1).getReg();
1088     unsigned Addr = MI.getOperand(2).getReg();
1089     unsigned CmpVal = MI.getOperand(3).getReg();
1090     unsigned NewVal = MI.getOperand(4).getReg();
1091     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1092                                   **MI.memoperands_begin());
1093     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1094     MI.eraseFromParent();
1095     return Legalized;
1096   }
1097   case TargetOpcode::G_LOAD:
1098   case TargetOpcode::G_SEXTLOAD:
1099   case TargetOpcode::G_ZEXTLOAD: {
1100     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1101     unsigned DstReg = MI.getOperand(0).getReg();
1102     unsigned PtrReg = MI.getOperand(1).getReg();
1103     LLT DstTy = MRI.getType(DstReg);
1104     auto &MMO = **MI.memoperands_begin();
1105 
1106     if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1107       // In the case of G_LOAD, this was a non-extending load already and we're
1108       // about to lower to the same instruction.
1109       if (MI.getOpcode() == TargetOpcode::G_LOAD)
1110           return UnableToLegalize;
1111       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1112       MI.eraseFromParent();
1113       return Legalized;
1114     }
1115 
1116     if (DstTy.isScalar()) {
1117       unsigned TmpReg = MRI.createGenericVirtualRegister(
1118           LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1119       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1120       switch (MI.getOpcode()) {
1121       default:
1122         llvm_unreachable("Unexpected opcode");
1123       case TargetOpcode::G_LOAD:
1124         MIRBuilder.buildAnyExt(DstReg, TmpReg);
1125         break;
1126       case TargetOpcode::G_SEXTLOAD:
1127         MIRBuilder.buildSExt(DstReg, TmpReg);
1128         break;
1129       case TargetOpcode::G_ZEXTLOAD:
1130         MIRBuilder.buildZExt(DstReg, TmpReg);
1131         break;
1132       }
1133       MI.eraseFromParent();
1134       return Legalized;
1135     }
1136 
1137     return UnableToLegalize;
1138   }
1139   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1140   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1141   case TargetOpcode::G_CTLZ:
1142   case TargetOpcode::G_CTTZ:
1143   case TargetOpcode::G_CTPOP:
1144     return lowerBitCount(MI, TypeIdx, Ty);
1145   case G_UADDE: {
1146     unsigned Res = MI.getOperand(0).getReg();
1147     unsigned CarryOut = MI.getOperand(1).getReg();
1148     unsigned LHS = MI.getOperand(2).getReg();
1149     unsigned RHS = MI.getOperand(3).getReg();
1150     unsigned CarryIn = MI.getOperand(4).getReg();
1151 
1152     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1153     unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1154 
1155     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1156     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1157     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1158     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1159 
1160     MI.eraseFromParent();
1161     return Legalized;
1162   }
1163   }
1164 }
1165 
1166 LegalizerHelper::LegalizeResult
fewerElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)1167 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
1168                                      LLT NarrowTy) {
1169   // FIXME: Don't know how to handle secondary types yet.
1170   if (TypeIdx != 0)
1171     return UnableToLegalize;
1172 
1173   MIRBuilder.setInstr(MI);
1174   switch (MI.getOpcode()) {
1175   default:
1176     return UnableToLegalize;
1177   case TargetOpcode::G_IMPLICIT_DEF: {
1178     SmallVector<unsigned, 2> DstRegs;
1179 
1180     unsigned NarrowSize = NarrowTy.getSizeInBits();
1181     unsigned DstReg = MI.getOperand(0).getReg();
1182     unsigned Size = MRI.getType(DstReg).getSizeInBits();
1183     int NumParts = Size / NarrowSize;
1184     // FIXME: Don't know how to handle the situation where the small vectors
1185     // aren't all the same size yet.
1186     if (Size % NarrowSize != 0)
1187       return UnableToLegalize;
1188 
1189     for (int i = 0; i < NumParts; ++i) {
1190       unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1191       MIRBuilder.buildUndef(TmpReg);
1192       DstRegs.push_back(TmpReg);
1193     }
1194 
1195     if (NarrowTy.isVector())
1196       MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1197     else
1198       MIRBuilder.buildBuildVector(DstReg, DstRegs);
1199 
1200     MI.eraseFromParent();
1201     return Legalized;
1202   }
1203   case TargetOpcode::G_ADD: {
1204     unsigned NarrowSize = NarrowTy.getSizeInBits();
1205     unsigned DstReg = MI.getOperand(0).getReg();
1206     unsigned Size = MRI.getType(DstReg).getSizeInBits();
1207     int NumParts = Size / NarrowSize;
1208     // FIXME: Don't know how to handle the situation where the small vectors
1209     // aren't all the same size yet.
1210     if (Size % NarrowSize != 0)
1211       return UnableToLegalize;
1212 
1213     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1214     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
1215     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
1216 
1217     for (int i = 0; i < NumParts; ++i) {
1218       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1219       MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
1220       DstRegs.push_back(DstReg);
1221     }
1222 
1223     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1224     MI.eraseFromParent();
1225     return Legalized;
1226   }
1227   case TargetOpcode::G_LOAD:
1228   case TargetOpcode::G_STORE: {
1229     bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1230     unsigned ValReg = MI.getOperand(0).getReg();
1231     unsigned AddrReg = MI.getOperand(1).getReg();
1232     unsigned NarrowSize = NarrowTy.getSizeInBits();
1233     unsigned Size = MRI.getType(ValReg).getSizeInBits();
1234     unsigned NumParts = Size / NarrowSize;
1235 
1236     SmallVector<unsigned, 8> NarrowRegs;
1237     if (!IsLoad)
1238       extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
1239 
1240     const LLT OffsetTy =
1241         LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
1242     MachineFunction &MF = *MI.getMF();
1243     MachineMemOperand *MMO = *MI.memoperands_begin();
1244     for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
1245       unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
1246       unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
1247       unsigned NewAddrReg = 0;
1248       MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
1249       MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
1250           MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
1251           NarrowTy.getSizeInBits() / 8, Alignment);
1252       if (IsLoad) {
1253         unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
1254         NarrowRegs.push_back(Dst);
1255         MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
1256       } else {
1257         MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
1258       }
1259     }
1260     if (IsLoad) {
1261       if (NarrowTy.isVector())
1262         MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
1263       else
1264         MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
1265     }
1266     MI.eraseFromParent();
1267     return Legalized;
1268   }
1269   }
1270 }
1271 
1272 LegalizerHelper::LegalizeResult
lowerBitCount(MachineInstr & MI,unsigned TypeIdx,LLT Ty)1273 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1274   unsigned Opc = MI.getOpcode();
1275   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1276   auto isSupported = [this](const LegalityQuery &Q) {
1277     auto QAction = LI.getAction(Q).Action;
1278     return QAction == Legal || QAction == Libcall || QAction == Custom;
1279   };
1280   switch (Opc) {
1281   default:
1282     return UnableToLegalize;
1283   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1284     // This trivially expands to CTLZ.
1285     Observer.changingInstr(MI);
1286     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
1287     Observer.changedInstr(MI);
1288     return Legalized;
1289   }
1290   case TargetOpcode::G_CTLZ: {
1291     unsigned SrcReg = MI.getOperand(1).getReg();
1292     unsigned Len = Ty.getSizeInBits();
1293     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1294       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
1295       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
1296                                              {Ty}, {SrcReg});
1297       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1298       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1299       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1300                                           SrcReg, MIBZero);
1301       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1302                              MIBCtlzZU);
1303       MI.eraseFromParent();
1304       return Legalized;
1305     }
1306     // for now, we do this:
1307     // NewLen = NextPowerOf2(Len);
1308     // x = x | (x >> 1);
1309     // x = x | (x >> 2);
1310     // ...
1311     // x = x | (x >>16);
1312     // x = x | (x >>32); // for 64-bit input
1313     // Upto NewLen/2
1314     // return Len - popcount(x);
1315     //
1316     // Ref: "Hacker's Delight" by Henry Warren
1317     unsigned Op = SrcReg;
1318     unsigned NewLen = PowerOf2Ceil(Len);
1319     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
1320       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
1321       auto MIBOp = MIRBuilder.buildInstr(
1322           TargetOpcode::G_OR, {Ty},
1323           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
1324                                      {Op, MIBShiftAmt})});
1325       Op = MIBOp->getOperand(0).getReg();
1326     }
1327     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
1328     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1329                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
1330     MI.eraseFromParent();
1331     return Legalized;
1332   }
1333   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
1334     // This trivially expands to CTTZ.
1335     Observer.changingInstr(MI);
1336     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
1337     Observer.changedInstr(MI);
1338     return Legalized;
1339   }
1340   case TargetOpcode::G_CTTZ: {
1341     unsigned SrcReg = MI.getOperand(1).getReg();
1342     unsigned Len = Ty.getSizeInBits();
1343     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
1344       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1345       // zero.
1346       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
1347                                              {Ty}, {SrcReg});
1348       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1349       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1350       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1351                                           SrcReg, MIBZero);
1352       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1353                              MIBCttzZU);
1354       MI.eraseFromParent();
1355       return Legalized;
1356     }
1357     // for now, we use: { return popcount(~x & (x - 1)); }
1358     // unless the target has ctlz but not ctpop, in which case we use:
1359     // { return 32 - nlz(~x & (x-1)); }
1360     // Ref: "Hacker's Delight" by Henry Warren
1361     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
1362     auto MIBNot =
1363         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
1364     auto MIBTmp = MIRBuilder.buildInstr(
1365         TargetOpcode::G_AND, {Ty},
1366         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
1367                                        {SrcReg, MIBCstNeg1})});
1368     if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
1369         isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
1370       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
1371       MIRBuilder.buildInstr(
1372           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1373           {MIBCstLen,
1374            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
1375       MI.eraseFromParent();
1376       return Legalized;
1377     }
1378     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
1379     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
1380     return Legalized;
1381   }
1382   }
1383 }
1384