1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
33                                  GISelChangeObserver &Observer,
34                                  MachineIRBuilder &Builder)
35     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
36       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
37   MIRBuilder.setMF(MF);
38   MIRBuilder.setChangeObserver(Observer);
39 }
40 
41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
42                                  GISelChangeObserver &Observer,
43                                  MachineIRBuilder &B)
44     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
45   MIRBuilder.setMF(MF);
46   MIRBuilder.setChangeObserver(Observer);
47 }
48 LegalizerHelper::LegalizeResult
49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
50   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
51 
52   auto Step = LI.getAction(MI, MRI);
53   switch (Step.Action) {
54   case Legal:
55     LLVM_DEBUG(dbgs() << ".. Already legal\n");
56     return AlreadyLegal;
57   case Libcall:
58     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
59     return libcall(MI);
60   case NarrowScalar:
61     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
62     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
63   case WidenScalar:
64     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
65     return widenScalar(MI, Step.TypeIdx, Step.NewType);
66   case Lower:
67     LLVM_DEBUG(dbgs() << ".. Lower\n");
68     return lower(MI, Step.TypeIdx, Step.NewType);
69   case FewerElements:
70     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
71     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
72   case Custom:
73     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
74     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
75                                                             : UnableToLegalize;
76   default:
77     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
78     return UnableToLegalize;
79   }
80 }
81 
82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
83                                    SmallVectorImpl<unsigned> &VRegs) {
84   for (int i = 0; i < NumParts; ++i)
85     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
86   MIRBuilder.buildUnmerge(VRegs, Reg);
87 }
88 
89 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
90   switch (Opcode) {
91   case TargetOpcode::G_SDIV:
92     assert((Size == 32 || Size == 64) && "Unsupported size");
93     return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
94   case TargetOpcode::G_UDIV:
95     assert((Size == 32 || Size == 64) && "Unsupported size");
96     return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
97   case TargetOpcode::G_SREM:
98     assert((Size == 32 || Size == 64) && "Unsupported size");
99     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
100   case TargetOpcode::G_UREM:
101     assert((Size == 32 || Size == 64) && "Unsupported size");
102     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
103   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
104     assert(Size == 32 && "Unsupported size");
105     return RTLIB::CTLZ_I32;
106   case TargetOpcode::G_FADD:
107     assert((Size == 32 || Size == 64) && "Unsupported size");
108     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
109   case TargetOpcode::G_FSUB:
110     assert((Size == 32 || Size == 64) && "Unsupported size");
111     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
112   case TargetOpcode::G_FMUL:
113     assert((Size == 32 || Size == 64) && "Unsupported size");
114     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
115   case TargetOpcode::G_FDIV:
116     assert((Size == 32 || Size == 64) && "Unsupported size");
117     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
118   case TargetOpcode::G_FREM:
119     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
120   case TargetOpcode::G_FPOW:
121     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
122   case TargetOpcode::G_FMA:
123     assert((Size == 32 || Size == 64) && "Unsupported size");
124     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
125   }
126   llvm_unreachable("Unknown libcall function");
127 }
128 
129 LegalizerHelper::LegalizeResult
130 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
131                     const CallLowering::ArgInfo &Result,
132                     ArrayRef<CallLowering::ArgInfo> Args) {
133   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
134   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
135   const char *Name = TLI.getLibcallName(Libcall);
136 
137   MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
138   if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
139                      MachineOperand::CreateES(Name), Result, Args))
140     return LegalizerHelper::UnableToLegalize;
141 
142   return LegalizerHelper::Legalized;
143 }
144 
145 // Useful for libcalls where all operands have the same type.
146 static LegalizerHelper::LegalizeResult
147 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
148               Type *OpType) {
149   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
150 
151   SmallVector<CallLowering::ArgInfo, 3> Args;
152   for (unsigned i = 1; i < MI.getNumOperands(); i++)
153     Args.push_back({MI.getOperand(i).getReg(), OpType});
154   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
155                        Args);
156 }
157 
158 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
159                                        Type *FromType) {
160   auto ToMVT = MVT::getVT(ToType);
161   auto FromMVT = MVT::getVT(FromType);
162 
163   switch (Opcode) {
164   case TargetOpcode::G_FPEXT:
165     return RTLIB::getFPEXT(FromMVT, ToMVT);
166   case TargetOpcode::G_FPTRUNC:
167     return RTLIB::getFPROUND(FromMVT, ToMVT);
168   case TargetOpcode::G_FPTOSI:
169     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
170   case TargetOpcode::G_FPTOUI:
171     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
172   case TargetOpcode::G_SITOFP:
173     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
174   case TargetOpcode::G_UITOFP:
175     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
176   }
177   llvm_unreachable("Unsupported libcall function");
178 }
179 
180 static LegalizerHelper::LegalizeResult
181 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
182                   Type *FromType) {
183   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
184   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
185                        {{MI.getOperand(1).getReg(), FromType}});
186 }
187 
188 LegalizerHelper::LegalizeResult
189 LegalizerHelper::libcall(MachineInstr &MI) {
190   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
191   unsigned Size = LLTy.getSizeInBits();
192   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
193 
194   MIRBuilder.setInstr(MI);
195 
196   switch (MI.getOpcode()) {
197   default:
198     return UnableToLegalize;
199   case TargetOpcode::G_SDIV:
200   case TargetOpcode::G_UDIV:
201   case TargetOpcode::G_SREM:
202   case TargetOpcode::G_UREM:
203   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
204     Type *HLTy = IntegerType::get(Ctx, Size);
205     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
206     if (Status != Legalized)
207       return Status;
208     break;
209   }
210   case TargetOpcode::G_FADD:
211   case TargetOpcode::G_FSUB:
212   case TargetOpcode::G_FMUL:
213   case TargetOpcode::G_FDIV:
214   case TargetOpcode::G_FMA:
215   case TargetOpcode::G_FPOW:
216   case TargetOpcode::G_FREM: {
217     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
218     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
219     if (Status != Legalized)
220       return Status;
221     break;
222   }
223   case TargetOpcode::G_FPEXT: {
224     // FIXME: Support other floating point types (half, fp128 etc)
225     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
226     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
227     if (ToSize != 64 || FromSize != 32)
228       return UnableToLegalize;
229     LegalizeResult Status = conversionLibcall(
230         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
231     if (Status != Legalized)
232       return Status;
233     break;
234   }
235   case TargetOpcode::G_FPTRUNC: {
236     // FIXME: Support other floating point types (half, fp128 etc)
237     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
238     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
239     if (ToSize != 32 || FromSize != 64)
240       return UnableToLegalize;
241     LegalizeResult Status = conversionLibcall(
242         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
243     if (Status != Legalized)
244       return Status;
245     break;
246   }
247   case TargetOpcode::G_FPTOSI:
248   case TargetOpcode::G_FPTOUI: {
249     // FIXME: Support other types
250     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
251     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
252     if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
253       return UnableToLegalize;
254     LegalizeResult Status = conversionLibcall(
255         MI, MIRBuilder, Type::getInt32Ty(Ctx),
256         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
257     if (Status != Legalized)
258       return Status;
259     break;
260   }
261   case TargetOpcode::G_SITOFP:
262   case TargetOpcode::G_UITOFP: {
263     // FIXME: Support other types
264     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
265     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
266     if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
267       return UnableToLegalize;
268     LegalizeResult Status = conversionLibcall(
269         MI, MIRBuilder,
270         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
271         Type::getInt32Ty(Ctx));
272     if (Status != Legalized)
273       return Status;
274     break;
275   }
276   }
277 
278   MI.eraseFromParent();
279   return Legalized;
280 }
281 
282 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
283                                                               unsigned TypeIdx,
284                                                               LLT NarrowTy) {
285   MIRBuilder.setInstr(MI);
286 
287   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
288   uint64_t NarrowSize = NarrowTy.getSizeInBits();
289 
290   switch (MI.getOpcode()) {
291   default:
292     return UnableToLegalize;
293   case TargetOpcode::G_IMPLICIT_DEF: {
294     // FIXME: add support for when SizeOp0 isn't an exact multiple of
295     // NarrowSize.
296     if (SizeOp0 % NarrowSize != 0)
297       return UnableToLegalize;
298     int NumParts = SizeOp0 / NarrowSize;
299 
300     SmallVector<unsigned, 2> DstRegs;
301     for (int i = 0; i < NumParts; ++i)
302       DstRegs.push_back(
303           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
304 
305     unsigned DstReg = MI.getOperand(0).getReg();
306     if(MRI.getType(DstReg).isVector())
307       MIRBuilder.buildBuildVector(DstReg, DstRegs);
308     else
309       MIRBuilder.buildMerge(DstReg, DstRegs);
310     MI.eraseFromParent();
311     return Legalized;
312   }
313   case TargetOpcode::G_ADD: {
314     // FIXME: add support for when SizeOp0 isn't an exact multiple of
315     // NarrowSize.
316     if (SizeOp0 % NarrowSize != 0)
317       return UnableToLegalize;
318     // Expand in terms of carry-setting/consuming G_ADDE instructions.
319     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
320 
321     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
322     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
323     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
324 
325     unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
326     MIRBuilder.buildConstant(CarryIn, 0);
327 
328     for (int i = 0; i < NumParts; ++i) {
329       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
330       unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
331 
332       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
333                             Src2Regs[i], CarryIn);
334 
335       DstRegs.push_back(DstReg);
336       CarryIn = CarryOut;
337     }
338     unsigned DstReg = MI.getOperand(0).getReg();
339     if(MRI.getType(DstReg).isVector())
340       MIRBuilder.buildBuildVector(DstReg, DstRegs);
341     else
342       MIRBuilder.buildMerge(DstReg, DstRegs);
343     MI.eraseFromParent();
344     return Legalized;
345   }
346   case TargetOpcode::G_SUB: {
347     // FIXME: add support for when SizeOp0 isn't an exact multiple of
348     // NarrowSize.
349     if (SizeOp0 % NarrowSize != 0)
350       return UnableToLegalize;
351 
352     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
353 
354     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
355     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
356     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
357 
358     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
359     unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
360     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
361                           {Src1Regs[0], Src2Regs[0]});
362     DstRegs.push_back(DstReg);
363     unsigned BorrowIn = BorrowOut;
364     for (int i = 1; i < NumParts; ++i) {
365       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
366       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
367 
368       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
369                             {Src1Regs[i], Src2Regs[i], BorrowIn});
370 
371       DstRegs.push_back(DstReg);
372       BorrowIn = BorrowOut;
373     }
374     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
375     MI.eraseFromParent();
376     return Legalized;
377   }
378   case TargetOpcode::G_MUL:
379     return narrowScalarMul(MI, TypeIdx, NarrowTy);
380   case TargetOpcode::G_EXTRACT: {
381     if (TypeIdx != 1)
382       return UnableToLegalize;
383 
384     int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
385     // FIXME: add support for when SizeOp1 isn't an exact multiple of
386     // NarrowSize.
387     if (SizeOp1 % NarrowSize != 0)
388       return UnableToLegalize;
389     int NumParts = SizeOp1 / NarrowSize;
390 
391     SmallVector<unsigned, 2> SrcRegs, DstRegs;
392     SmallVector<uint64_t, 2> Indexes;
393     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
394 
395     unsigned OpReg = MI.getOperand(0).getReg();
396     uint64_t OpStart = MI.getOperand(2).getImm();
397     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
398     for (int i = 0; i < NumParts; ++i) {
399       unsigned SrcStart = i * NarrowSize;
400 
401       if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
402         // No part of the extract uses this subregister, ignore it.
403         continue;
404       } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
405         // The entire subregister is extracted, forward the value.
406         DstRegs.push_back(SrcRegs[i]);
407         continue;
408       }
409 
410       // OpSegStart is where this destination segment would start in OpReg if it
411       // extended infinitely in both directions.
412       int64_t ExtractOffset;
413       uint64_t SegSize;
414       if (OpStart < SrcStart) {
415         ExtractOffset = 0;
416         SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
417       } else {
418         ExtractOffset = OpStart - SrcStart;
419         SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
420       }
421 
422       unsigned SegReg = SrcRegs[i];
423       if (ExtractOffset != 0 || SegSize != NarrowSize) {
424         // A genuine extract is needed.
425         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
426         MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
427       }
428 
429       DstRegs.push_back(SegReg);
430     }
431 
432     unsigned DstReg = MI.getOperand(0).getReg();
433     if(MRI.getType(DstReg).isVector())
434       MIRBuilder.buildBuildVector(DstReg, DstRegs);
435     else
436       MIRBuilder.buildMerge(DstReg, DstRegs);
437     MI.eraseFromParent();
438     return Legalized;
439   }
440   case TargetOpcode::G_INSERT: {
441     // FIXME: Don't know how to handle secondary types yet.
442     if (TypeIdx != 0)
443       return UnableToLegalize;
444 
445     // FIXME: add support for when SizeOp0 isn't an exact multiple of
446     // NarrowSize.
447     if (SizeOp0 % NarrowSize != 0)
448       return UnableToLegalize;
449 
450     int NumParts = SizeOp0 / NarrowSize;
451 
452     SmallVector<unsigned, 2> SrcRegs, DstRegs;
453     SmallVector<uint64_t, 2> Indexes;
454     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
455 
456     unsigned OpReg = MI.getOperand(2).getReg();
457     uint64_t OpStart = MI.getOperand(3).getImm();
458     uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
459     for (int i = 0; i < NumParts; ++i) {
460       unsigned DstStart = i * NarrowSize;
461 
462       if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
463         // No part of the insert affects this subregister, forward the original.
464         DstRegs.push_back(SrcRegs[i]);
465         continue;
466       } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
467         // The entire subregister is defined by this insert, forward the new
468         // value.
469         DstRegs.push_back(OpReg);
470         continue;
471       }
472 
473       // OpSegStart is where this destination segment would start in OpReg if it
474       // extended infinitely in both directions.
475       int64_t ExtractOffset, InsertOffset;
476       uint64_t SegSize;
477       if (OpStart < DstStart) {
478         InsertOffset = 0;
479         ExtractOffset = DstStart - OpStart;
480         SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
481       } else {
482         InsertOffset = OpStart - DstStart;
483         ExtractOffset = 0;
484         SegSize =
485             std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
486       }
487 
488       unsigned SegReg = OpReg;
489       if (ExtractOffset != 0 || SegSize != OpSize) {
490         // A genuine extract is needed.
491         SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
492         MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
493       }
494 
495       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
496       MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
497       DstRegs.push_back(DstReg);
498     }
499 
500     assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
501     unsigned DstReg = MI.getOperand(0).getReg();
502     if(MRI.getType(DstReg).isVector())
503       MIRBuilder.buildBuildVector(DstReg, DstRegs);
504     else
505       MIRBuilder.buildMerge(DstReg, DstRegs);
506     MI.eraseFromParent();
507     return Legalized;
508   }
509   case TargetOpcode::G_LOAD: {
510     // FIXME: add support for when SizeOp0 isn't an exact multiple of
511     // NarrowSize.
512     if (SizeOp0 % NarrowSize != 0)
513       return UnableToLegalize;
514 
515     const auto &MMO = **MI.memoperands_begin();
516     // This implementation doesn't work for atomics. Give up instead of doing
517     // something invalid.
518     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
519         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
520       return UnableToLegalize;
521 
522     int NumParts = SizeOp0 / NarrowSize;
523     LLT OffsetTy = LLT::scalar(
524         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
525 
526     SmallVector<unsigned, 2> DstRegs;
527     for (int i = 0; i < NumParts; ++i) {
528       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
529       unsigned SrcReg = 0;
530       unsigned Adjustment = i * NarrowSize / 8;
531       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
532 
533       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
534           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
535           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
536           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
537 
538       MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
539                                 Adjustment);
540 
541       MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
542 
543       DstRegs.push_back(DstReg);
544     }
545     unsigned DstReg = MI.getOperand(0).getReg();
546     if(MRI.getType(DstReg).isVector())
547       MIRBuilder.buildBuildVector(DstReg, DstRegs);
548     else
549       MIRBuilder.buildMerge(DstReg, DstRegs);
550     MI.eraseFromParent();
551     return Legalized;
552   }
553   case TargetOpcode::G_ZEXTLOAD:
554   case TargetOpcode::G_SEXTLOAD: {
555     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
556     unsigned DstReg = MI.getOperand(0).getReg();
557     unsigned PtrReg = MI.getOperand(1).getReg();
558 
559     unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
560     auto &MMO = **MI.memoperands_begin();
561     if (MMO.getSize() * 8 == NarrowSize) {
562       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
563     } else {
564       unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
565         : TargetOpcode::G_SEXTLOAD;
566       MIRBuilder.buildInstr(ExtLoad)
567         .addDef(TmpReg)
568         .addUse(PtrReg)
569         .addMemOperand(&MMO);
570     }
571 
572     if (ZExt)
573       MIRBuilder.buildZExt(DstReg, TmpReg);
574     else
575       MIRBuilder.buildSExt(DstReg, TmpReg);
576 
577     MI.eraseFromParent();
578     return Legalized;
579   }
580   case TargetOpcode::G_STORE: {
581     // FIXME: add support for when SizeOp0 isn't an exact multiple of
582     // NarrowSize.
583     if (SizeOp0 % NarrowSize != 0)
584       return UnableToLegalize;
585 
586     const auto &MMO = **MI.memoperands_begin();
587     // This implementation doesn't work for atomics. Give up instead of doing
588     // something invalid.
589     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
590         MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
591       return UnableToLegalize;
592 
593     int NumParts = SizeOp0 / NarrowSize;
594     LLT OffsetTy = LLT::scalar(
595         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
596 
597     SmallVector<unsigned, 2> SrcRegs;
598     extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
599 
600     for (int i = 0; i < NumParts; ++i) {
601       unsigned DstReg = 0;
602       unsigned Adjustment = i * NarrowSize / 8;
603       unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
604 
605       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
606           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
607           NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
608           MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
609 
610       MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
611                                 Adjustment);
612 
613       MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
614     }
615     MI.eraseFromParent();
616     return Legalized;
617   }
618   case TargetOpcode::G_CONSTANT: {
619     // FIXME: add support for when SizeOp0 isn't an exact multiple of
620     // NarrowSize.
621     if (SizeOp0 % NarrowSize != 0)
622       return UnableToLegalize;
623     int NumParts = SizeOp0 / NarrowSize;
624     const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
625     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
626 
627     SmallVector<unsigned, 2> DstRegs;
628     for (int i = 0; i < NumParts; ++i) {
629       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
630       ConstantInt *CI =
631           ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
632       MIRBuilder.buildConstant(DstReg, *CI);
633       DstRegs.push_back(DstReg);
634     }
635     unsigned DstReg = MI.getOperand(0).getReg();
636     if(MRI.getType(DstReg).isVector())
637       MIRBuilder.buildBuildVector(DstReg, DstRegs);
638     else
639       MIRBuilder.buildMerge(DstReg, DstRegs);
640     MI.eraseFromParent();
641     return Legalized;
642   }
643   case TargetOpcode::G_AND:
644   case TargetOpcode::G_OR:
645   case TargetOpcode::G_XOR: {
646     // Legalize bitwise operation:
647     // A = BinOp<Ty> B, C
648     // into:
649     // B1, ..., BN = G_UNMERGE_VALUES B
650     // C1, ..., CN = G_UNMERGE_VALUES C
651     // A1 = BinOp<Ty/N> B1, C2
652     // ...
653     // AN = BinOp<Ty/N> BN, CN
654     // A = G_MERGE_VALUES A1, ..., AN
655 
656     // FIXME: add support for when SizeOp0 isn't an exact multiple of
657     // NarrowSize.
658     if (SizeOp0 % NarrowSize != 0)
659       return UnableToLegalize;
660     int NumParts = SizeOp0 / NarrowSize;
661 
662     // List the registers where the destination will be scattered.
663     SmallVector<unsigned, 2> DstRegs;
664     // List the registers where the first argument will be split.
665     SmallVector<unsigned, 2> SrcsReg1;
666     // List the registers where the second argument will be split.
667     SmallVector<unsigned, 2> SrcsReg2;
668     // Create all the temporary registers.
669     for (int i = 0; i < NumParts; ++i) {
670       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
671       unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
672       unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
673 
674       DstRegs.push_back(DstReg);
675       SrcsReg1.push_back(SrcReg1);
676       SrcsReg2.push_back(SrcReg2);
677     }
678     // Explode the big arguments into smaller chunks.
679     MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
680     MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
681 
682     // Do the operation on each small part.
683     for (int i = 0; i < NumParts; ++i)
684       MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
685                             {SrcsReg1[i], SrcsReg2[i]});
686 
687     // Gather the destination registers into the final destination.
688     unsigned DstReg = MI.getOperand(0).getReg();
689     if(MRI.getType(DstReg).isVector())
690       MIRBuilder.buildBuildVector(DstReg, DstRegs);
691     else
692       MIRBuilder.buildMerge(DstReg, DstRegs);
693     MI.eraseFromParent();
694     return Legalized;
695   }
696   case TargetOpcode::G_SHL:
697   case TargetOpcode::G_LSHR:
698   case TargetOpcode::G_ASHR: {
699     if (TypeIdx != 1)
700       return UnableToLegalize; // TODO
701     narrowScalarSrc(MI, NarrowTy, 2);
702     return Legalized;
703   }
704   }
705 }
706 
707 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
708                                      unsigned OpIdx, unsigned ExtOpcode) {
709   MachineOperand &MO = MI.getOperand(OpIdx);
710   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
711   MO.setReg(ExtB->getOperand(0).getReg());
712 }
713 
714 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
715                                       unsigned OpIdx) {
716   MachineOperand &MO = MI.getOperand(OpIdx);
717   auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
718                                     {MO.getReg()});
719   MO.setReg(ExtB->getOperand(0).getReg());
720 }
721 
722 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
723                                      unsigned OpIdx, unsigned TruncOpcode) {
724   MachineOperand &MO = MI.getOperand(OpIdx);
725   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
726   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
727   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
728   MO.setReg(DstExt);
729 }
730 
731 LegalizerHelper::LegalizeResult
732 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
733   MIRBuilder.setInstr(MI);
734 
735   switch (MI.getOpcode()) {
736   default:
737     return UnableToLegalize;
738   case TargetOpcode::G_UADDO:
739   case TargetOpcode::G_USUBO: {
740     if (TypeIdx == 1)
741       return UnableToLegalize; // TODO
742     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
743                                          {MI.getOperand(2).getReg()});
744     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
745                                          {MI.getOperand(3).getReg()});
746     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
747                           ? TargetOpcode::G_ADD
748                           : TargetOpcode::G_SUB;
749     // Do the arithmetic in the larger type.
750     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
751     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
752     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
753     auto AndOp = MIRBuilder.buildInstr(
754         TargetOpcode::G_AND, {WideTy},
755         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
756     // There is no overflow if the AndOp is the same as NewOp.
757     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
758                          AndOp);
759     // Now trunc the NewOp to the original result.
760     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
761     MI.eraseFromParent();
762     return Legalized;
763   }
764   case TargetOpcode::G_CTTZ:
765   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
766   case TargetOpcode::G_CTLZ:
767   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
768   case TargetOpcode::G_CTPOP: {
769     // First ZEXT the input.
770     auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
771     LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
772     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
773       // The count is the same in the larger type except if the original
774       // value was zero.  This can be handled by setting the bit just off
775       // the top of the original type.
776       auto TopBit =
777           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
778       MIBSrc = MIRBuilder.buildInstr(
779           TargetOpcode::G_OR, {WideTy},
780           {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
781     }
782     // Perform the operation at the larger size.
783     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
784     // This is already the correct result for CTPOP and CTTZs
785     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
786         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
787       // The correct result is NewOp - (Difference in widety and current ty).
788       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
789       MIBNewOp = MIRBuilder.buildInstr(
790           TargetOpcode::G_SUB, {WideTy},
791           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
792     }
793     auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
794     // Make the original instruction a trunc now, and update its source.
795     Observer.changingInstr(MI);
796     MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
797     MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
798     Observer.changedInstr(MI);
799     return Legalized;
800   }
801 
802   case TargetOpcode::G_ADD:
803   case TargetOpcode::G_AND:
804   case TargetOpcode::G_MUL:
805   case TargetOpcode::G_OR:
806   case TargetOpcode::G_XOR:
807   case TargetOpcode::G_SUB:
808     // Perform operation at larger width (any extension is fine here, high bits
809     // don't affect the result) and then truncate the result back to the
810     // original type.
811     Observer.changingInstr(MI);
812     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
813     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
814     widenScalarDst(MI, WideTy);
815     Observer.changedInstr(MI);
816     return Legalized;
817 
818   case TargetOpcode::G_SHL:
819       Observer.changingInstr(MI);
820 
821     if (TypeIdx == 0) {
822       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
823       widenScalarDst(MI, WideTy);
824     } else {
825       assert(TypeIdx == 1);
826       // The "number of bits to shift" operand must preserve its value as an
827       // unsigned integer:
828       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
829     }
830 
831     Observer.changedInstr(MI);
832     return Legalized;
833 
834   case TargetOpcode::G_SDIV:
835   case TargetOpcode::G_SREM:
836     Observer.changingInstr(MI);
837     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
838     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
839     widenScalarDst(MI, WideTy);
840     Observer.changedInstr(MI);
841     return Legalized;
842 
843   case TargetOpcode::G_ASHR:
844   case TargetOpcode::G_LSHR:
845     Observer.changingInstr(MI);
846 
847     if (TypeIdx == 0) {
848       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
849         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
850 
851       widenScalarSrc(MI, WideTy, 1, CvtOp);
852       widenScalarDst(MI, WideTy);
853     } else {
854       assert(TypeIdx == 1);
855       // The "number of bits to shift" operand must preserve its value as an
856       // unsigned integer:
857       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
858     }
859 
860     Observer.changedInstr(MI);
861     return Legalized;
862   case TargetOpcode::G_UDIV:
863   case TargetOpcode::G_UREM:
864     Observer.changingInstr(MI);
865     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
866     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
867     widenScalarDst(MI, WideTy);
868     Observer.changedInstr(MI);
869     return Legalized;
870 
871   case TargetOpcode::G_SELECT:
872     Observer.changingInstr(MI);
873     if (TypeIdx == 0) {
874       // Perform operation at larger width (any extension is fine here, high
875       // bits don't affect the result) and then truncate the result back to the
876       // original type.
877       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
878       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
879       widenScalarDst(MI, WideTy);
880     } else {
881       // Explicit extension is required here since high bits affect the result.
882       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
883     }
884     Observer.changedInstr(MI);
885     return Legalized;
886 
887   case TargetOpcode::G_FPTOSI:
888   case TargetOpcode::G_FPTOUI:
889     if (TypeIdx != 0)
890       return UnableToLegalize;
891     Observer.changingInstr(MI);
892     widenScalarDst(MI, WideTy);
893     Observer.changedInstr(MI);
894     return Legalized;
895 
896   case TargetOpcode::G_SITOFP:
897     if (TypeIdx != 1)
898       return UnableToLegalize;
899     Observer.changingInstr(MI);
900     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
901     Observer.changedInstr(MI);
902     return Legalized;
903 
904   case TargetOpcode::G_UITOFP:
905     if (TypeIdx != 1)
906       return UnableToLegalize;
907     Observer.changingInstr(MI);
908     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
909     Observer.changedInstr(MI);
910     return Legalized;
911 
912   case TargetOpcode::G_INSERT:
913     if (TypeIdx != 0)
914       return UnableToLegalize;
915     Observer.changingInstr(MI);
916     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
917     widenScalarDst(MI, WideTy);
918     Observer.changedInstr(MI);
919     return Legalized;
920 
921   case TargetOpcode::G_LOAD:
922     // For some types like i24, we might try to widen to i32. To properly handle
923     // this we should be using a dedicated extending load, until then avoid
924     // trying to legalize.
925     if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
926         WideTy.getSizeInBits())
927       return UnableToLegalize;
928     LLVM_FALLTHROUGH;
929   case TargetOpcode::G_SEXTLOAD:
930   case TargetOpcode::G_ZEXTLOAD:
931     Observer.changingInstr(MI);
932     widenScalarDst(MI, WideTy);
933     Observer.changedInstr(MI);
934     return Legalized;
935 
936   case TargetOpcode::G_STORE: {
937     if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
938         WideTy != LLT::scalar(8))
939       return UnableToLegalize;
940 
941     Observer.changingInstr(MI);
942     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
943     Observer.changedInstr(MI);
944     return Legalized;
945   }
946   case TargetOpcode::G_CONSTANT: {
947     MachineOperand &SrcMO = MI.getOperand(1);
948     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
949     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
950     Observer.changingInstr(MI);
951     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
952 
953     widenScalarDst(MI, WideTy);
954     Observer.changedInstr(MI);
955     return Legalized;
956   }
957   case TargetOpcode::G_FCONSTANT: {
958     MachineOperand &SrcMO = MI.getOperand(1);
959     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
960     APFloat Val = SrcMO.getFPImm()->getValueAPF();
961     bool LosesInfo;
962     switch (WideTy.getSizeInBits()) {
963     case 32:
964       Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
965       break;
966     case 64:
967       Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
968       break;
969     default:
970       llvm_unreachable("Unhandled fp widen type");
971     }
972     Observer.changingInstr(MI);
973     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
974 
975     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
976     Observer.changedInstr(MI);
977     return Legalized;
978   }
979   case TargetOpcode::G_IMPLICIT_DEF: {
980     Observer.changingInstr(MI);
981     widenScalarDst(MI, WideTy);
982     Observer.changedInstr(MI);
983     return Legalized;
984   }
985   case TargetOpcode::G_BRCOND:
986     Observer.changingInstr(MI);
987     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
988     Observer.changedInstr(MI);
989     return Legalized;
990 
991   case TargetOpcode::G_FCMP:
992     Observer.changingInstr(MI);
993     if (TypeIdx == 0)
994       widenScalarDst(MI, WideTy);
995     else {
996       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
997       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
998     }
999     Observer.changedInstr(MI);
1000     return Legalized;
1001 
1002   case TargetOpcode::G_ICMP:
1003     Observer.changingInstr(MI);
1004     if (TypeIdx == 0)
1005       widenScalarDst(MI, WideTy);
1006     else {
1007       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1008                                MI.getOperand(1).getPredicate()))
1009                                ? TargetOpcode::G_SEXT
1010                                : TargetOpcode::G_ZEXT;
1011       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1012       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1013     }
1014     Observer.changedInstr(MI);
1015     return Legalized;
1016 
1017   case TargetOpcode::G_GEP:
1018     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1019     Observer.changingInstr(MI);
1020     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1021     Observer.changedInstr(MI);
1022     return Legalized;
1023 
1024   case TargetOpcode::G_PHI: {
1025     assert(TypeIdx == 0 && "Expecting only Idx 0");
1026 
1027     Observer.changingInstr(MI);
1028     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1029       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1030       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1031       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1032     }
1033 
1034     MachineBasicBlock &MBB = *MI.getParent();
1035     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1036     widenScalarDst(MI, WideTy);
1037     Observer.changedInstr(MI);
1038     return Legalized;
1039   }
1040   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1041     if (TypeIdx == 0) {
1042       unsigned VecReg = MI.getOperand(1).getReg();
1043       LLT VecTy = MRI.getType(VecReg);
1044       Observer.changingInstr(MI);
1045 
1046       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1047                                      WideTy.getSizeInBits()),
1048                      1, TargetOpcode::G_SEXT);
1049 
1050       widenScalarDst(MI, WideTy, 0);
1051       Observer.changedInstr(MI);
1052       return Legalized;
1053     }
1054 
1055     if (TypeIdx != 2)
1056       return UnableToLegalize;
1057     Observer.changingInstr(MI);
1058     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1059     Observer.changedInstr(MI);
1060     return Legalized;
1061   }
1062   case TargetOpcode::G_FADD:
1063   case TargetOpcode::G_FMUL:
1064   case TargetOpcode::G_FSUB:
1065   case TargetOpcode::G_FMA:
1066   case TargetOpcode::G_FNEG:
1067   case TargetOpcode::G_FABS:
1068   case TargetOpcode::G_FDIV:
1069   case TargetOpcode::G_FREM:
1070   case TargetOpcode::G_FCEIL:
1071     assert(TypeIdx == 0);
1072     Observer.changingInstr(MI);
1073 
1074     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1075       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1076 
1077     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1078     Observer.changedInstr(MI);
1079     return Legalized;
1080   }
1081 }
1082 
1083 LegalizerHelper::LegalizeResult
1084 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1085   using namespace TargetOpcode;
1086   MIRBuilder.setInstr(MI);
1087 
1088   switch(MI.getOpcode()) {
1089   default:
1090     return UnableToLegalize;
1091   case TargetOpcode::G_SREM:
1092   case TargetOpcode::G_UREM: {
1093     unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
1094     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1095         .addDef(QuotReg)
1096         .addUse(MI.getOperand(1).getReg())
1097         .addUse(MI.getOperand(2).getReg());
1098 
1099     unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
1100     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1101     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1102                         ProdReg);
1103     MI.eraseFromParent();
1104     return Legalized;
1105   }
1106   case TargetOpcode::G_SMULO:
1107   case TargetOpcode::G_UMULO: {
1108     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1109     // result.
1110     unsigned Res = MI.getOperand(0).getReg();
1111     unsigned Overflow = MI.getOperand(1).getReg();
1112     unsigned LHS = MI.getOperand(2).getReg();
1113     unsigned RHS = MI.getOperand(3).getReg();
1114 
1115     MIRBuilder.buildMul(Res, LHS, RHS);
1116 
1117     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1118                           ? TargetOpcode::G_SMULH
1119                           : TargetOpcode::G_UMULH;
1120 
1121     unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1122     MIRBuilder.buildInstr(Opcode)
1123       .addDef(HiPart)
1124       .addUse(LHS)
1125       .addUse(RHS);
1126 
1127     unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1128     MIRBuilder.buildConstant(Zero, 0);
1129 
1130     // For *signed* multiply, overflow is detected by checking:
1131     // (hi != (lo >> bitwidth-1))
1132     if (Opcode == TargetOpcode::G_SMULH) {
1133       unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1134       unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1135       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1136       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1137         .addDef(Shifted)
1138         .addUse(Res)
1139         .addUse(ShiftAmt);
1140       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1141     } else {
1142       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1143     }
1144     MI.eraseFromParent();
1145     return Legalized;
1146   }
1147   case TargetOpcode::G_FNEG: {
1148     // TODO: Handle vector types once we are able to
1149     // represent them.
1150     if (Ty.isVector())
1151       return UnableToLegalize;
1152     unsigned Res = MI.getOperand(0).getReg();
1153     Type *ZeroTy;
1154     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1155     switch (Ty.getSizeInBits()) {
1156     case 16:
1157       ZeroTy = Type::getHalfTy(Ctx);
1158       break;
1159     case 32:
1160       ZeroTy = Type::getFloatTy(Ctx);
1161       break;
1162     case 64:
1163       ZeroTy = Type::getDoubleTy(Ctx);
1164       break;
1165     case 128:
1166       ZeroTy = Type::getFP128Ty(Ctx);
1167       break;
1168     default:
1169       llvm_unreachable("unexpected floating-point type");
1170     }
1171     ConstantFP &ZeroForNegation =
1172         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1173     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1174     MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1175         .addDef(Res)
1176         .addUse(Zero->getOperand(0).getReg())
1177         .addUse(MI.getOperand(1).getReg());
1178     MI.eraseFromParent();
1179     return Legalized;
1180   }
1181   case TargetOpcode::G_FSUB: {
1182     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1183     // First, check if G_FNEG is marked as Lower. If so, we may
1184     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1185     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1186       return UnableToLegalize;
1187     unsigned Res = MI.getOperand(0).getReg();
1188     unsigned LHS = MI.getOperand(1).getReg();
1189     unsigned RHS = MI.getOperand(2).getReg();
1190     unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1191     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1192     MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1193         .addDef(Res)
1194         .addUse(LHS)
1195         .addUse(Neg);
1196     MI.eraseFromParent();
1197     return Legalized;
1198   }
1199   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1200     unsigned OldValRes = MI.getOperand(0).getReg();
1201     unsigned SuccessRes = MI.getOperand(1).getReg();
1202     unsigned Addr = MI.getOperand(2).getReg();
1203     unsigned CmpVal = MI.getOperand(3).getReg();
1204     unsigned NewVal = MI.getOperand(4).getReg();
1205     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1206                                   **MI.memoperands_begin());
1207     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1208     MI.eraseFromParent();
1209     return Legalized;
1210   }
1211   case TargetOpcode::G_LOAD:
1212   case TargetOpcode::G_SEXTLOAD:
1213   case TargetOpcode::G_ZEXTLOAD: {
1214     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1215     unsigned DstReg = MI.getOperand(0).getReg();
1216     unsigned PtrReg = MI.getOperand(1).getReg();
1217     LLT DstTy = MRI.getType(DstReg);
1218     auto &MMO = **MI.memoperands_begin();
1219 
1220     if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1221       // In the case of G_LOAD, this was a non-extending load already and we're
1222       // about to lower to the same instruction.
1223       if (MI.getOpcode() == TargetOpcode::G_LOAD)
1224           return UnableToLegalize;
1225       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1226       MI.eraseFromParent();
1227       return Legalized;
1228     }
1229 
1230     if (DstTy.isScalar()) {
1231       unsigned TmpReg = MRI.createGenericVirtualRegister(
1232           LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1233       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1234       switch (MI.getOpcode()) {
1235       default:
1236         llvm_unreachable("Unexpected opcode");
1237       case TargetOpcode::G_LOAD:
1238         MIRBuilder.buildAnyExt(DstReg, TmpReg);
1239         break;
1240       case TargetOpcode::G_SEXTLOAD:
1241         MIRBuilder.buildSExt(DstReg, TmpReg);
1242         break;
1243       case TargetOpcode::G_ZEXTLOAD:
1244         MIRBuilder.buildZExt(DstReg, TmpReg);
1245         break;
1246       }
1247       MI.eraseFromParent();
1248       return Legalized;
1249     }
1250 
1251     return UnableToLegalize;
1252   }
1253   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1254   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1255   case TargetOpcode::G_CTLZ:
1256   case TargetOpcode::G_CTTZ:
1257   case TargetOpcode::G_CTPOP:
1258     return lowerBitCount(MI, TypeIdx, Ty);
1259   case G_UADDE: {
1260     unsigned Res = MI.getOperand(0).getReg();
1261     unsigned CarryOut = MI.getOperand(1).getReg();
1262     unsigned LHS = MI.getOperand(2).getReg();
1263     unsigned RHS = MI.getOperand(3).getReg();
1264     unsigned CarryIn = MI.getOperand(4).getReg();
1265 
1266     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1267     unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1268 
1269     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1270     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1271     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1272     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1273 
1274     MI.eraseFromParent();
1275     return Legalized;
1276   }
1277   case G_USUBO: {
1278     unsigned Res = MI.getOperand(0).getReg();
1279     unsigned BorrowOut = MI.getOperand(1).getReg();
1280     unsigned LHS = MI.getOperand(2).getReg();
1281     unsigned RHS = MI.getOperand(3).getReg();
1282 
1283     MIRBuilder.buildSub(Res, LHS, RHS);
1284     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1285 
1286     MI.eraseFromParent();
1287     return Legalized;
1288   }
1289   case G_USUBE: {
1290     unsigned Res = MI.getOperand(0).getReg();
1291     unsigned BorrowOut = MI.getOperand(1).getReg();
1292     unsigned LHS = MI.getOperand(2).getReg();
1293     unsigned RHS = MI.getOperand(3).getReg();
1294     unsigned BorrowIn = MI.getOperand(4).getReg();
1295 
1296     unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1297     unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1298     unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1299     unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1300 
1301     MIRBuilder.buildSub(TmpRes, LHS, RHS);
1302     MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1303     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1304     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1305     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1306     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1307 
1308     MI.eraseFromParent();
1309     return Legalized;
1310   }
1311   }
1312 }
1313 
1314 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1315     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1316   SmallVector<unsigned, 2> DstRegs;
1317 
1318   unsigned NarrowSize = NarrowTy.getSizeInBits();
1319   unsigned DstReg = MI.getOperand(0).getReg();
1320   unsigned Size = MRI.getType(DstReg).getSizeInBits();
1321   int NumParts = Size / NarrowSize;
1322   // FIXME: Don't know how to handle the situation where the small vectors
1323   // aren't all the same size yet.
1324   if (Size % NarrowSize != 0)
1325     return UnableToLegalize;
1326 
1327   for (int i = 0; i < NumParts; ++i) {
1328     unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1329     MIRBuilder.buildUndef(TmpReg);
1330     DstRegs.push_back(TmpReg);
1331   }
1332 
1333   if (NarrowTy.isVector())
1334     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1335   else
1336     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1337 
1338   MI.eraseFromParent();
1339   return Legalized;
1340 }
1341 
1342 LegalizerHelper::LegalizeResult
1343 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
1344                                           LLT NarrowTy) {
1345   unsigned Opc = MI.getOpcode();
1346   unsigned NarrowSize = NarrowTy.getSizeInBits();
1347   unsigned DstReg = MI.getOperand(0).getReg();
1348   unsigned Flags = MI.getFlags();
1349   unsigned Size = MRI.getType(DstReg).getSizeInBits();
1350   int NumParts = Size / NarrowSize;
1351   // FIXME: Don't know how to handle the situation where the small vectors
1352   // aren't all the same size yet.
1353   if (Size % NarrowSize != 0)
1354     return UnableToLegalize;
1355 
1356   unsigned NumOps = MI.getNumOperands() - 1;
1357   SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1358 
1359   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1360 
1361   if (NumOps >= 2)
1362     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1363 
1364   if (NumOps >= 3)
1365     extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1366 
1367   for (int i = 0; i < NumParts; ++i) {
1368     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1369 
1370     if (NumOps == 1)
1371       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1372     else if (NumOps == 2) {
1373       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1374     } else if (NumOps == 3) {
1375       MIRBuilder.buildInstr(Opc, {DstReg},
1376                             {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1377     }
1378 
1379     DstRegs.push_back(DstReg);
1380   }
1381 
1382   if (NarrowTy.isVector())
1383     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1384   else
1385     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1386 
1387   MI.eraseFromParent();
1388   return Legalized;
1389 }
1390 
1391 LegalizerHelper::LegalizeResult
1392 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
1393                                           LLT NarrowTy) {
1394   if (TypeIdx != 0)
1395     return UnableToLegalize;
1396 
1397   unsigned DstReg = MI.getOperand(0).getReg();
1398   unsigned SrcReg = MI.getOperand(1).getReg();
1399   LLT DstTy = MRI.getType(DstReg);
1400   LLT SrcTy = MRI.getType(SrcReg);
1401 
1402   LLT NarrowTy0 = NarrowTy;
1403   LLT NarrowTy1;
1404   unsigned NumParts;
1405 
1406   if (NarrowTy.isScalar()) {
1407     NumParts = DstTy.getNumElements();
1408     NarrowTy1 = SrcTy.getElementType();
1409   } else {
1410     // Uneven breakdown not handled.
1411     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
1412     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
1413       return UnableToLegalize;
1414 
1415     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
1416   }
1417 
1418   SmallVector<unsigned, 4> SrcRegs, DstRegs;
1419   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
1420 
1421   for (unsigned I = 0; I < NumParts; ++I) {
1422     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1423     MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
1424       .addDef(DstReg)
1425       .addUse(SrcRegs[I]);
1426 
1427     NewInst->setFlags(MI.getFlags());
1428     DstRegs.push_back(DstReg);
1429   }
1430 
1431   if (NarrowTy.isVector())
1432     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1433   else
1434     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1435 
1436   MI.eraseFromParent();
1437   return Legalized;
1438 }
1439 
1440 LegalizerHelper::LegalizeResult
1441 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
1442                                         LLT NarrowTy) {
1443   unsigned DstReg = MI.getOperand(0).getReg();
1444   unsigned Src0Reg = MI.getOperand(2).getReg();
1445   LLT DstTy = MRI.getType(DstReg);
1446   LLT SrcTy = MRI.getType(Src0Reg);
1447 
1448   unsigned NumParts;
1449   LLT NarrowTy0, NarrowTy1;
1450 
1451   if (TypeIdx == 0) {
1452     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1453     unsigned OldElts = DstTy.getNumElements();
1454 
1455     NarrowTy0 = NarrowTy;
1456     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
1457     NarrowTy1 = NarrowTy.isVector() ?
1458       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
1459       SrcTy.getElementType();
1460 
1461   } else {
1462     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1463     unsigned OldElts = SrcTy.getNumElements();
1464 
1465     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
1466       NarrowTy.getNumElements();
1467     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
1468                             DstTy.getScalarSizeInBits());
1469     NarrowTy1 = NarrowTy;
1470   }
1471 
1472   // FIXME: Don't know how to handle the situation where the small vectors
1473   // aren't all the same size yet.
1474   if (NarrowTy1.isVector() &&
1475       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
1476     return UnableToLegalize;
1477 
1478   CmpInst::Predicate Pred
1479     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1480 
1481   SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1482   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
1483   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
1484 
1485   for (unsigned I = 0; I < NumParts; ++I) {
1486     unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1487     DstRegs.push_back(DstReg);
1488 
1489     if (MI.getOpcode() == TargetOpcode::G_ICMP)
1490       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1491     else {
1492       MachineInstr *NewCmp
1493         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1494       NewCmp->setFlags(MI.getFlags());
1495     }
1496   }
1497 
1498   if (NarrowTy1.isVector())
1499     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1500   else
1501     MIRBuilder.buildBuildVector(DstReg, DstRegs);
1502 
1503   MI.eraseFromParent();
1504   return Legalized;
1505 }
1506 
1507 LegalizerHelper::LegalizeResult
1508 LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx,
1509                                               LLT NarrowTy) {
1510   // FIXME: Don't know how to handle secondary types yet.
1511   if (TypeIdx != 0)
1512     return UnableToLegalize;
1513 
1514   MachineMemOperand *MMO = *MI.memoperands_begin();
1515 
1516   // This implementation doesn't work for atomics. Give up instead of doing
1517   // something invalid.
1518   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
1519       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
1520     return UnableToLegalize;
1521 
1522   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1523   unsigned ValReg = MI.getOperand(0).getReg();
1524   unsigned AddrReg = MI.getOperand(1).getReg();
1525   unsigned NarrowSize = NarrowTy.getSizeInBits();
1526   unsigned Size = MRI.getType(ValReg).getSizeInBits();
1527   unsigned NumParts = Size / NarrowSize;
1528 
1529   SmallVector<unsigned, 8> NarrowRegs;
1530   if (!IsLoad)
1531     extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
1532 
1533   const LLT OffsetTy =
1534     LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
1535   MachineFunction &MF = *MI.getMF();
1536 
1537   for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
1538     unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
1539     unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
1540     unsigned NewAddrReg = 0;
1541     MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
1542     MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
1543       MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
1544       NarrowTy.getSizeInBits() / 8, Alignment);
1545     if (IsLoad) {
1546       unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
1547       NarrowRegs.push_back(Dst);
1548       MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
1549     } else {
1550       MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
1551     }
1552   }
1553   if (IsLoad) {
1554     if (NarrowTy.isVector())
1555       MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
1556     else
1557       MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
1558   }
1559   MI.eraseFromParent();
1560   return Legalized;
1561 }
1562 
1563 LegalizerHelper::LegalizeResult
1564 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
1565                                      LLT NarrowTy) {
1566   using namespace TargetOpcode;
1567 
1568   MIRBuilder.setInstr(MI);
1569   switch (MI.getOpcode()) {
1570   case G_IMPLICIT_DEF:
1571     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
1572   case G_AND:
1573   case G_OR:
1574   case G_XOR:
1575   case G_ADD:
1576   case G_SUB:
1577   case G_MUL:
1578   case G_SMULH:
1579   case G_UMULH:
1580   case G_FADD:
1581   case G_FMUL:
1582   case G_FSUB:
1583   case G_FNEG:
1584   case G_FABS:
1585   case G_FDIV:
1586   case G_FREM:
1587   case G_FMA:
1588   case G_FPOW:
1589   case G_FEXP:
1590   case G_FEXP2:
1591   case G_FLOG:
1592   case G_FLOG2:
1593   case G_FLOG10:
1594   case G_FCEIL:
1595   case G_INTRINSIC_ROUND:
1596   case G_INTRINSIC_TRUNC:
1597     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
1598   case G_ZEXT:
1599   case G_SEXT:
1600   case G_ANYEXT:
1601   case G_FPEXT:
1602   case G_FPTRUNC:
1603   case G_SITOFP:
1604   case G_UITOFP:
1605   case G_FPTOSI:
1606   case G_FPTOUI:
1607     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
1608   case G_ICMP:
1609   case G_FCMP:
1610     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
1611   case G_LOAD:
1612   case G_STORE:
1613     return fewerElementsVectorLoadStore(MI, TypeIdx, NarrowTy);
1614   default:
1615     return UnableToLegalize;
1616   }
1617 }
1618 
1619 LegalizerHelper::LegalizeResult
1620 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) {
1621   unsigned DstReg = MI.getOperand(0).getReg();
1622   unsigned Src0 = MI.getOperand(1).getReg();
1623   unsigned Src1 = MI.getOperand(2).getReg();
1624   LLT Ty = MRI.getType(DstReg);
1625   if (Ty.isVector())
1626     return UnableToLegalize;
1627 
1628   unsigned Size = Ty.getSizeInBits();
1629   unsigned NewSize = Size / 2;
1630   if (Size != 2 * NewSize)
1631     return UnableToLegalize;
1632 
1633   LLT HalfTy = LLT::scalar(NewSize);
1634   // TODO: if HalfTy != NewTy, handle the breakdown all at once?
1635 
1636   unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1637   unsigned Lo = MRI.createGenericVirtualRegister(HalfTy);
1638   unsigned Hi = MRI.createGenericVirtualRegister(HalfTy);
1639   unsigned ExtLo = MRI.createGenericVirtualRegister(Ty);
1640   unsigned ExtHi = MRI.createGenericVirtualRegister(Ty);
1641   unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty);
1642 
1643   SmallVector<unsigned, 2> Src0Parts;
1644   SmallVector<unsigned, 2> Src1Parts;
1645 
1646   extractParts(Src0, HalfTy, 2, Src0Parts);
1647   extractParts(Src1, HalfTy, 2, Src1Parts);
1648 
1649   MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]);
1650 
1651   // TODO: Use smulh or umulh depending on what the target has.
1652   MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]);
1653 
1654   MIRBuilder.buildConstant(ShiftAmt, NewSize);
1655   MIRBuilder.buildAnyExt(ExtHi, Hi);
1656   MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt);
1657 
1658   MIRBuilder.buildZExt(ExtLo, Lo);
1659   MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi);
1660   MI.eraseFromParent();
1661   return Legalized;
1662 }
1663 
1664 LegalizerHelper::LegalizeResult
1665 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1666   unsigned Opc = MI.getOpcode();
1667   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1668   auto isSupported = [this](const LegalityQuery &Q) {
1669     auto QAction = LI.getAction(Q).Action;
1670     return QAction == Legal || QAction == Libcall || QAction == Custom;
1671   };
1672   switch (Opc) {
1673   default:
1674     return UnableToLegalize;
1675   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1676     // This trivially expands to CTLZ.
1677     Observer.changingInstr(MI);
1678     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
1679     Observer.changedInstr(MI);
1680     return Legalized;
1681   }
1682   case TargetOpcode::G_CTLZ: {
1683     unsigned SrcReg = MI.getOperand(1).getReg();
1684     unsigned Len = Ty.getSizeInBits();
1685     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1686       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
1687       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
1688                                              {Ty}, {SrcReg});
1689       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1690       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1691       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1692                                           SrcReg, MIBZero);
1693       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1694                              MIBCtlzZU);
1695       MI.eraseFromParent();
1696       return Legalized;
1697     }
1698     // for now, we do this:
1699     // NewLen = NextPowerOf2(Len);
1700     // x = x | (x >> 1);
1701     // x = x | (x >> 2);
1702     // ...
1703     // x = x | (x >>16);
1704     // x = x | (x >>32); // for 64-bit input
1705     // Upto NewLen/2
1706     // return Len - popcount(x);
1707     //
1708     // Ref: "Hacker's Delight" by Henry Warren
1709     unsigned Op = SrcReg;
1710     unsigned NewLen = PowerOf2Ceil(Len);
1711     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
1712       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
1713       auto MIBOp = MIRBuilder.buildInstr(
1714           TargetOpcode::G_OR, {Ty},
1715           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
1716                                      {Op, MIBShiftAmt})});
1717       Op = MIBOp->getOperand(0).getReg();
1718     }
1719     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
1720     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1721                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
1722     MI.eraseFromParent();
1723     return Legalized;
1724   }
1725   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
1726     // This trivially expands to CTTZ.
1727     Observer.changingInstr(MI);
1728     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
1729     Observer.changedInstr(MI);
1730     return Legalized;
1731   }
1732   case TargetOpcode::G_CTTZ: {
1733     unsigned SrcReg = MI.getOperand(1).getReg();
1734     unsigned Len = Ty.getSizeInBits();
1735     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
1736       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1737       // zero.
1738       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
1739                                              {Ty}, {SrcReg});
1740       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1741       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1742       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1743                                           SrcReg, MIBZero);
1744       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1745                              MIBCttzZU);
1746       MI.eraseFromParent();
1747       return Legalized;
1748     }
1749     // for now, we use: { return popcount(~x & (x - 1)); }
1750     // unless the target has ctlz but not ctpop, in which case we use:
1751     // { return 32 - nlz(~x & (x-1)); }
1752     // Ref: "Hacker's Delight" by Henry Warren
1753     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
1754     auto MIBNot =
1755         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
1756     auto MIBTmp = MIRBuilder.buildInstr(
1757         TargetOpcode::G_AND, {Ty},
1758         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
1759                                        {SrcReg, MIBCstNeg1})});
1760     if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
1761         isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
1762       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
1763       MIRBuilder.buildInstr(
1764           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1765           {MIBCstLen,
1766            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
1767       MI.eraseFromParent();
1768       return Legalized;
1769     }
1770     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
1771     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
1772     return Legalized;
1773   }
1774   }
1775 }
1776