1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This file implements the LegalizerHelper class to legalize
11 /// individual instructions and the LegalizeMachineIR wrapper pass for the
12 /// primary legalization.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
19 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 #define DEBUG_TYPE "legalizer"
29
30 using namespace llvm;
31 using namespace LegalizeActions;
32
LegalizerHelper(MachineFunction & MF,GISelChangeObserver & Observer,MachineIRBuilder & Builder)33 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
34 GISelChangeObserver &Observer,
35 MachineIRBuilder &Builder)
36 : MIRBuilder(Builder), MRI(MF.getRegInfo()),
37 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
38 MIRBuilder.setMF(MF);
39 MIRBuilder.setChangeObserver(Observer);
40 }
41
LegalizerHelper(MachineFunction & MF,const LegalizerInfo & LI,GISelChangeObserver & Observer,MachineIRBuilder & B)42 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
43 GISelChangeObserver &Observer,
44 MachineIRBuilder &B)
45 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
46 MIRBuilder.setMF(MF);
47 MIRBuilder.setChangeObserver(Observer);
48 }
49 LegalizerHelper::LegalizeResult
legalizeInstrStep(MachineInstr & MI)50 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
51 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
52
53 auto Step = LI.getAction(MI, MRI);
54 switch (Step.Action) {
55 case Legal:
56 LLVM_DEBUG(dbgs() << ".. Already legal\n");
57 return AlreadyLegal;
58 case Libcall:
59 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
60 return libcall(MI);
61 case NarrowScalar:
62 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
63 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
64 case WidenScalar:
65 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
66 return widenScalar(MI, Step.TypeIdx, Step.NewType);
67 case Lower:
68 LLVM_DEBUG(dbgs() << ".. Lower\n");
69 return lower(MI, Step.TypeIdx, Step.NewType);
70 case FewerElements:
71 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
72 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
73 case Custom:
74 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
75 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
76 : UnableToLegalize;
77 default:
78 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
79 return UnableToLegalize;
80 }
81 }
82
extractParts(unsigned Reg,LLT Ty,int NumParts,SmallVectorImpl<unsigned> & VRegs)83 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
84 SmallVectorImpl<unsigned> &VRegs) {
85 for (int i = 0; i < NumParts; ++i)
86 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
87 MIRBuilder.buildUnmerge(VRegs, Reg);
88 }
89
getRTLibDesc(unsigned Opcode,unsigned Size)90 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
91 switch (Opcode) {
92 case TargetOpcode::G_SDIV:
93 assert((Size == 32 || Size == 64) && "Unsupported size");
94 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
95 case TargetOpcode::G_UDIV:
96 assert((Size == 32 || Size == 64) && "Unsupported size");
97 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
98 case TargetOpcode::G_SREM:
99 assert((Size == 32 || Size == 64) && "Unsupported size");
100 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
101 case TargetOpcode::G_UREM:
102 assert((Size == 32 || Size == 64) && "Unsupported size");
103 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
104 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
105 assert(Size == 32 && "Unsupported size");
106 return RTLIB::CTLZ_I32;
107 case TargetOpcode::G_FADD:
108 assert((Size == 32 || Size == 64) && "Unsupported size");
109 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
110 case TargetOpcode::G_FSUB:
111 assert((Size == 32 || Size == 64) && "Unsupported size");
112 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
113 case TargetOpcode::G_FMUL:
114 assert((Size == 32 || Size == 64) && "Unsupported size");
115 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
116 case TargetOpcode::G_FDIV:
117 assert((Size == 32 || Size == 64) && "Unsupported size");
118 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
119 case TargetOpcode::G_FREM:
120 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
121 case TargetOpcode::G_FPOW:
122 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
123 case TargetOpcode::G_FMA:
124 assert((Size == 32 || Size == 64) && "Unsupported size");
125 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
126 }
127 llvm_unreachable("Unknown libcall function");
128 }
129
130 LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder & MIRBuilder,RTLIB::Libcall Libcall,const CallLowering::ArgInfo & Result,ArrayRef<CallLowering::ArgInfo> Args)131 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
132 const CallLowering::ArgInfo &Result,
133 ArrayRef<CallLowering::ArgInfo> Args) {
134 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
135 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
136 const char *Name = TLI.getLibcallName(Libcall);
137
138 MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
139 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
140 MachineOperand::CreateES(Name), Result, Args))
141 return LegalizerHelper::UnableToLegalize;
142
143 return LegalizerHelper::Legalized;
144 }
145
146 // Useful for libcalls where all operands have the same type.
147 static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,unsigned Size,Type * OpType)148 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
149 Type *OpType) {
150 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
151
152 SmallVector<CallLowering::ArgInfo, 3> Args;
153 for (unsigned i = 1; i < MI.getNumOperands(); i++)
154 Args.push_back({MI.getOperand(i).getReg(), OpType});
155 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
156 Args);
157 }
158
getConvRTLibDesc(unsigned Opcode,Type * ToType,Type * FromType)159 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
160 Type *FromType) {
161 auto ToMVT = MVT::getVT(ToType);
162 auto FromMVT = MVT::getVT(FromType);
163
164 switch (Opcode) {
165 case TargetOpcode::G_FPEXT:
166 return RTLIB::getFPEXT(FromMVT, ToMVT);
167 case TargetOpcode::G_FPTRUNC:
168 return RTLIB::getFPROUND(FromMVT, ToMVT);
169 case TargetOpcode::G_FPTOSI:
170 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
171 case TargetOpcode::G_FPTOUI:
172 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
173 case TargetOpcode::G_SITOFP:
174 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
175 case TargetOpcode::G_UITOFP:
176 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
177 }
178 llvm_unreachable("Unsupported libcall function");
179 }
180
181 static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,Type * ToType,Type * FromType)182 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
183 Type *FromType) {
184 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
185 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
186 {{MI.getOperand(1).getReg(), FromType}});
187 }
188
189 LegalizerHelper::LegalizeResult
libcall(MachineInstr & MI)190 LegalizerHelper::libcall(MachineInstr &MI) {
191 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
192 unsigned Size = LLTy.getSizeInBits();
193 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
194
195 MIRBuilder.setInstr(MI);
196
197 switch (MI.getOpcode()) {
198 default:
199 return UnableToLegalize;
200 case TargetOpcode::G_SDIV:
201 case TargetOpcode::G_UDIV:
202 case TargetOpcode::G_SREM:
203 case TargetOpcode::G_UREM:
204 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
205 Type *HLTy = IntegerType::get(Ctx, Size);
206 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
207 if (Status != Legalized)
208 return Status;
209 break;
210 }
211 case TargetOpcode::G_FADD:
212 case TargetOpcode::G_FSUB:
213 case TargetOpcode::G_FMUL:
214 case TargetOpcode::G_FDIV:
215 case TargetOpcode::G_FMA:
216 case TargetOpcode::G_FPOW:
217 case TargetOpcode::G_FREM: {
218 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
219 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
220 if (Status != Legalized)
221 return Status;
222 break;
223 }
224 case TargetOpcode::G_FPEXT: {
225 // FIXME: Support other floating point types (half, fp128 etc)
226 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
227 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
228 if (ToSize != 64 || FromSize != 32)
229 return UnableToLegalize;
230 LegalizeResult Status = conversionLibcall(
231 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
232 if (Status != Legalized)
233 return Status;
234 break;
235 }
236 case TargetOpcode::G_FPTRUNC: {
237 // FIXME: Support other floating point types (half, fp128 etc)
238 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
239 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
240 if (ToSize != 32 || FromSize != 64)
241 return UnableToLegalize;
242 LegalizeResult Status = conversionLibcall(
243 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
244 if (Status != Legalized)
245 return Status;
246 break;
247 }
248 case TargetOpcode::G_FPTOSI:
249 case TargetOpcode::G_FPTOUI: {
250 // FIXME: Support other types
251 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
252 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
253 if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
254 return UnableToLegalize;
255 LegalizeResult Status = conversionLibcall(
256 MI, MIRBuilder, Type::getInt32Ty(Ctx),
257 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
258 if (Status != Legalized)
259 return Status;
260 break;
261 }
262 case TargetOpcode::G_SITOFP:
263 case TargetOpcode::G_UITOFP: {
264 // FIXME: Support other types
265 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
266 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
267 if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
268 return UnableToLegalize;
269 LegalizeResult Status = conversionLibcall(
270 MI, MIRBuilder,
271 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
272 Type::getInt32Ty(Ctx));
273 if (Status != Legalized)
274 return Status;
275 break;
276 }
277 }
278
279 MI.eraseFromParent();
280 return Legalized;
281 }
282
narrowScalar(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)283 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
284 unsigned TypeIdx,
285 LLT NarrowTy) {
286 // FIXME: Don't know how to handle secondary types yet.
287 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
288 return UnableToLegalize;
289
290 MIRBuilder.setInstr(MI);
291
292 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
293 uint64_t NarrowSize = NarrowTy.getSizeInBits();
294
295 switch (MI.getOpcode()) {
296 default:
297 return UnableToLegalize;
298 case TargetOpcode::G_IMPLICIT_DEF: {
299 // FIXME: add support for when SizeOp0 isn't an exact multiple of
300 // NarrowSize.
301 if (SizeOp0 % NarrowSize != 0)
302 return UnableToLegalize;
303 int NumParts = SizeOp0 / NarrowSize;
304
305 SmallVector<unsigned, 2> DstRegs;
306 for (int i = 0; i < NumParts; ++i)
307 DstRegs.push_back(
308 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
309
310 unsigned DstReg = MI.getOperand(0).getReg();
311 if(MRI.getType(DstReg).isVector())
312 MIRBuilder.buildBuildVector(DstReg, DstRegs);
313 else
314 MIRBuilder.buildMerge(DstReg, DstRegs);
315 MI.eraseFromParent();
316 return Legalized;
317 }
318 case TargetOpcode::G_ADD: {
319 // FIXME: add support for when SizeOp0 isn't an exact multiple of
320 // NarrowSize.
321 if (SizeOp0 % NarrowSize != 0)
322 return UnableToLegalize;
323 // Expand in terms of carry-setting/consuming G_ADDE instructions.
324 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
325
326 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
327 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
328 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
329
330 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
331 MIRBuilder.buildConstant(CarryIn, 0);
332
333 for (int i = 0; i < NumParts; ++i) {
334 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
335 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
336
337 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
338 Src2Regs[i], CarryIn);
339
340 DstRegs.push_back(DstReg);
341 CarryIn = CarryOut;
342 }
343 unsigned DstReg = MI.getOperand(0).getReg();
344 if(MRI.getType(DstReg).isVector())
345 MIRBuilder.buildBuildVector(DstReg, DstRegs);
346 else
347 MIRBuilder.buildMerge(DstReg, DstRegs);
348 MI.eraseFromParent();
349 return Legalized;
350 }
351 case TargetOpcode::G_EXTRACT: {
352 if (TypeIdx != 1)
353 return UnableToLegalize;
354
355 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
356 // FIXME: add support for when SizeOp1 isn't an exact multiple of
357 // NarrowSize.
358 if (SizeOp1 % NarrowSize != 0)
359 return UnableToLegalize;
360 int NumParts = SizeOp1 / NarrowSize;
361
362 SmallVector<unsigned, 2> SrcRegs, DstRegs;
363 SmallVector<uint64_t, 2> Indexes;
364 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
365
366 unsigned OpReg = MI.getOperand(0).getReg();
367 uint64_t OpStart = MI.getOperand(2).getImm();
368 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
369 for (int i = 0; i < NumParts; ++i) {
370 unsigned SrcStart = i * NarrowSize;
371
372 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
373 // No part of the extract uses this subregister, ignore it.
374 continue;
375 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
376 // The entire subregister is extracted, forward the value.
377 DstRegs.push_back(SrcRegs[i]);
378 continue;
379 }
380
381 // OpSegStart is where this destination segment would start in OpReg if it
382 // extended infinitely in both directions.
383 int64_t ExtractOffset;
384 uint64_t SegSize;
385 if (OpStart < SrcStart) {
386 ExtractOffset = 0;
387 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
388 } else {
389 ExtractOffset = OpStart - SrcStart;
390 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
391 }
392
393 unsigned SegReg = SrcRegs[i];
394 if (ExtractOffset != 0 || SegSize != NarrowSize) {
395 // A genuine extract is needed.
396 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
397 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
398 }
399
400 DstRegs.push_back(SegReg);
401 }
402
403 unsigned DstReg = MI.getOperand(0).getReg();
404 if(MRI.getType(DstReg).isVector())
405 MIRBuilder.buildBuildVector(DstReg, DstRegs);
406 else
407 MIRBuilder.buildMerge(DstReg, DstRegs);
408 MI.eraseFromParent();
409 return Legalized;
410 }
411 case TargetOpcode::G_INSERT: {
412 // FIXME: add support for when SizeOp0 isn't an exact multiple of
413 // NarrowSize.
414 if (SizeOp0 % NarrowSize != 0)
415 return UnableToLegalize;
416
417 int NumParts = SizeOp0 / NarrowSize;
418
419 SmallVector<unsigned, 2> SrcRegs, DstRegs;
420 SmallVector<uint64_t, 2> Indexes;
421 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
422
423 unsigned OpReg = MI.getOperand(2).getReg();
424 uint64_t OpStart = MI.getOperand(3).getImm();
425 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
426 for (int i = 0; i < NumParts; ++i) {
427 unsigned DstStart = i * NarrowSize;
428
429 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
430 // No part of the insert affects this subregister, forward the original.
431 DstRegs.push_back(SrcRegs[i]);
432 continue;
433 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
434 // The entire subregister is defined by this insert, forward the new
435 // value.
436 DstRegs.push_back(OpReg);
437 continue;
438 }
439
440 // OpSegStart is where this destination segment would start in OpReg if it
441 // extended infinitely in both directions.
442 int64_t ExtractOffset, InsertOffset;
443 uint64_t SegSize;
444 if (OpStart < DstStart) {
445 InsertOffset = 0;
446 ExtractOffset = DstStart - OpStart;
447 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
448 } else {
449 InsertOffset = OpStart - DstStart;
450 ExtractOffset = 0;
451 SegSize =
452 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
453 }
454
455 unsigned SegReg = OpReg;
456 if (ExtractOffset != 0 || SegSize != OpSize) {
457 // A genuine extract is needed.
458 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
459 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
460 }
461
462 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
463 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
464 DstRegs.push_back(DstReg);
465 }
466
467 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
468 unsigned DstReg = MI.getOperand(0).getReg();
469 if(MRI.getType(DstReg).isVector())
470 MIRBuilder.buildBuildVector(DstReg, DstRegs);
471 else
472 MIRBuilder.buildMerge(DstReg, DstRegs);
473 MI.eraseFromParent();
474 return Legalized;
475 }
476 case TargetOpcode::G_LOAD: {
477 // FIXME: add support for when SizeOp0 isn't an exact multiple of
478 // NarrowSize.
479 if (SizeOp0 % NarrowSize != 0)
480 return UnableToLegalize;
481
482 const auto &MMO = **MI.memoperands_begin();
483 // This implementation doesn't work for atomics. Give up instead of doing
484 // something invalid.
485 if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
486 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
487 return UnableToLegalize;
488
489 int NumParts = SizeOp0 / NarrowSize;
490 LLT OffsetTy = LLT::scalar(
491 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
492
493 SmallVector<unsigned, 2> DstRegs;
494 for (int i = 0; i < NumParts; ++i) {
495 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
496 unsigned SrcReg = 0;
497 unsigned Adjustment = i * NarrowSize / 8;
498 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
499
500 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
501 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
502 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
503 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
504
505 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
506 Adjustment);
507
508 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
509
510 DstRegs.push_back(DstReg);
511 }
512 unsigned DstReg = MI.getOperand(0).getReg();
513 if(MRI.getType(DstReg).isVector())
514 MIRBuilder.buildBuildVector(DstReg, DstRegs);
515 else
516 MIRBuilder.buildMerge(DstReg, DstRegs);
517 MI.eraseFromParent();
518 return Legalized;
519 }
520 case TargetOpcode::G_STORE: {
521 // FIXME: add support for when SizeOp0 isn't an exact multiple of
522 // NarrowSize.
523 if (SizeOp0 % NarrowSize != 0)
524 return UnableToLegalize;
525
526 const auto &MMO = **MI.memoperands_begin();
527 // This implementation doesn't work for atomics. Give up instead of doing
528 // something invalid.
529 if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
530 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
531 return UnableToLegalize;
532
533 int NumParts = SizeOp0 / NarrowSize;
534 LLT OffsetTy = LLT::scalar(
535 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
536
537 SmallVector<unsigned, 2> SrcRegs;
538 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
539
540 for (int i = 0; i < NumParts; ++i) {
541 unsigned DstReg = 0;
542 unsigned Adjustment = i * NarrowSize / 8;
543 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
544
545 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
546 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
547 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
548 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
549
550 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
551 Adjustment);
552
553 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
554 }
555 MI.eraseFromParent();
556 return Legalized;
557 }
558 case TargetOpcode::G_CONSTANT: {
559 // FIXME: add support for when SizeOp0 isn't an exact multiple of
560 // NarrowSize.
561 if (SizeOp0 % NarrowSize != 0)
562 return UnableToLegalize;
563 int NumParts = SizeOp0 / NarrowSize;
564 const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
565 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
566
567 SmallVector<unsigned, 2> DstRegs;
568 for (int i = 0; i < NumParts; ++i) {
569 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
570 ConstantInt *CI =
571 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
572 MIRBuilder.buildConstant(DstReg, *CI);
573 DstRegs.push_back(DstReg);
574 }
575 unsigned DstReg = MI.getOperand(0).getReg();
576 if(MRI.getType(DstReg).isVector())
577 MIRBuilder.buildBuildVector(DstReg, DstRegs);
578 else
579 MIRBuilder.buildMerge(DstReg, DstRegs);
580 MI.eraseFromParent();
581 return Legalized;
582 }
583 case TargetOpcode::G_AND:
584 case TargetOpcode::G_OR:
585 case TargetOpcode::G_XOR: {
586 // Legalize bitwise operation:
587 // A = BinOp<Ty> B, C
588 // into:
589 // B1, ..., BN = G_UNMERGE_VALUES B
590 // C1, ..., CN = G_UNMERGE_VALUES C
591 // A1 = BinOp<Ty/N> B1, C2
592 // ...
593 // AN = BinOp<Ty/N> BN, CN
594 // A = G_MERGE_VALUES A1, ..., AN
595
596 // FIXME: add support for when SizeOp0 isn't an exact multiple of
597 // NarrowSize.
598 if (SizeOp0 % NarrowSize != 0)
599 return UnableToLegalize;
600 int NumParts = SizeOp0 / NarrowSize;
601
602 // List the registers where the destination will be scattered.
603 SmallVector<unsigned, 2> DstRegs;
604 // List the registers where the first argument will be split.
605 SmallVector<unsigned, 2> SrcsReg1;
606 // List the registers where the second argument will be split.
607 SmallVector<unsigned, 2> SrcsReg2;
608 // Create all the temporary registers.
609 for (int i = 0; i < NumParts; ++i) {
610 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
611 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
612 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
613
614 DstRegs.push_back(DstReg);
615 SrcsReg1.push_back(SrcReg1);
616 SrcsReg2.push_back(SrcReg2);
617 }
618 // Explode the big arguments into smaller chunks.
619 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
620 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
621
622 // Do the operation on each small part.
623 for (int i = 0; i < NumParts; ++i)
624 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
625 {SrcsReg1[i], SrcsReg2[i]});
626
627 // Gather the destination registers into the final destination.
628 unsigned DstReg = MI.getOperand(0).getReg();
629 if(MRI.getType(DstReg).isVector())
630 MIRBuilder.buildBuildVector(DstReg, DstRegs);
631 else
632 MIRBuilder.buildMerge(DstReg, DstRegs);
633 MI.eraseFromParent();
634 return Legalized;
635 }
636 }
637 }
638
widenScalarSrc(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned ExtOpcode)639 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
640 unsigned OpIdx, unsigned ExtOpcode) {
641 MachineOperand &MO = MI.getOperand(OpIdx);
642 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
643 MO.setReg(ExtB->getOperand(0).getReg());
644 }
645
widenScalarDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned TruncOpcode)646 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
647 unsigned OpIdx, unsigned TruncOpcode) {
648 MachineOperand &MO = MI.getOperand(OpIdx);
649 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
650 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
651 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
652 MO.setReg(DstExt);
653 }
654
655 LegalizerHelper::LegalizeResult
widenScalar(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)656 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
657 MIRBuilder.setInstr(MI);
658
659 switch (MI.getOpcode()) {
660 default:
661 return UnableToLegalize;
662 case TargetOpcode::G_UADDO:
663 case TargetOpcode::G_USUBO: {
664 if (TypeIdx == 1)
665 return UnableToLegalize; // TODO
666 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
667 {MI.getOperand(2).getReg()});
668 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
669 {MI.getOperand(3).getReg()});
670 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
671 ? TargetOpcode::G_ADD
672 : TargetOpcode::G_SUB;
673 // Do the arithmetic in the larger type.
674 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
675 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
676 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
677 auto AndOp = MIRBuilder.buildInstr(
678 TargetOpcode::G_AND, {WideTy},
679 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
680 // There is no overflow if the AndOp is the same as NewOp.
681 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
682 AndOp);
683 // Now trunc the NewOp to the original result.
684 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
685 MI.eraseFromParent();
686 return Legalized;
687 }
688 case TargetOpcode::G_CTTZ:
689 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
690 case TargetOpcode::G_CTLZ:
691 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
692 case TargetOpcode::G_CTPOP: {
693 // First ZEXT the input.
694 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
695 LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
696 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
697 // The count is the same in the larger type except if the original
698 // value was zero. This can be handled by setting the bit just off
699 // the top of the original type.
700 auto TopBit =
701 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
702 MIBSrc = MIRBuilder.buildInstr(
703 TargetOpcode::G_OR, {WideTy},
704 {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
705 }
706 // Perform the operation at the larger size.
707 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
708 // This is already the correct result for CTPOP and CTTZs
709 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
710 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
711 // The correct result is NewOp - (Difference in widety and current ty).
712 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
713 MIBNewOp = MIRBuilder.buildInstr(
714 TargetOpcode::G_SUB, {WideTy},
715 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
716 }
717 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
718 // Make the original instruction a trunc now, and update its source.
719 Observer.changingInstr(MI);
720 MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
721 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
722 Observer.changedInstr(MI);
723 return Legalized;
724 }
725
726 case TargetOpcode::G_ADD:
727 case TargetOpcode::G_AND:
728 case TargetOpcode::G_MUL:
729 case TargetOpcode::G_OR:
730 case TargetOpcode::G_XOR:
731 case TargetOpcode::G_SUB:
732 // Perform operation at larger width (any extension is fine here, high bits
733 // don't affect the result) and then truncate the result back to the
734 // original type.
735 Observer.changingInstr(MI);
736 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
737 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
738 widenScalarDst(MI, WideTy);
739 Observer.changedInstr(MI);
740 return Legalized;
741
742 case TargetOpcode::G_SHL:
743 Observer.changingInstr(MI);
744 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
745 // The "number of bits to shift" operand must preserve its value as an
746 // unsigned integer:
747 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
748 widenScalarDst(MI, WideTy);
749 Observer.changedInstr(MI);
750 return Legalized;
751
752 case TargetOpcode::G_SDIV:
753 case TargetOpcode::G_SREM:
754 Observer.changingInstr(MI);
755 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
756 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
757 widenScalarDst(MI, WideTy);
758 Observer.changedInstr(MI);
759 return Legalized;
760
761 case TargetOpcode::G_ASHR:
762 Observer.changingInstr(MI);
763 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
764 // The "number of bits to shift" operand must preserve its value as an
765 // unsigned integer:
766 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
767 widenScalarDst(MI, WideTy);
768 Observer.changedInstr(MI);
769 return Legalized;
770
771 case TargetOpcode::G_UDIV:
772 case TargetOpcode::G_UREM:
773 case TargetOpcode::G_LSHR:
774 Observer.changingInstr(MI);
775 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
776 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
777 widenScalarDst(MI, WideTy);
778 Observer.changedInstr(MI);
779 return Legalized;
780
781 case TargetOpcode::G_SELECT:
782 Observer.changingInstr(MI);
783 if (TypeIdx == 0) {
784 // Perform operation at larger width (any extension is fine here, high
785 // bits don't affect the result) and then truncate the result back to the
786 // original type.
787 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
788 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
789 widenScalarDst(MI, WideTy);
790 } else {
791 // Explicit extension is required here since high bits affect the result.
792 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
793 }
794 Observer.changedInstr(MI);
795 return Legalized;
796
797 case TargetOpcode::G_FPTOSI:
798 case TargetOpcode::G_FPTOUI:
799 if (TypeIdx != 0)
800 return UnableToLegalize;
801 Observer.changingInstr(MI);
802 widenScalarDst(MI, WideTy);
803 Observer.changedInstr(MI);
804 return Legalized;
805
806 case TargetOpcode::G_SITOFP:
807 if (TypeIdx != 1)
808 return UnableToLegalize;
809 Observer.changingInstr(MI);
810 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
811 Observer.changedInstr(MI);
812 return Legalized;
813
814 case TargetOpcode::G_UITOFP:
815 if (TypeIdx != 1)
816 return UnableToLegalize;
817 Observer.changingInstr(MI);
818 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
819 Observer.changedInstr(MI);
820 return Legalized;
821
822 case TargetOpcode::G_INSERT:
823 if (TypeIdx != 0)
824 return UnableToLegalize;
825 Observer.changingInstr(MI);
826 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
827 widenScalarDst(MI, WideTy);
828 Observer.changedInstr(MI);
829 return Legalized;
830
831 case TargetOpcode::G_LOAD:
832 // For some types like i24, we might try to widen to i32. To properly handle
833 // this we should be using a dedicated extending load, until then avoid
834 // trying to legalize.
835 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
836 WideTy.getSizeInBits())
837 return UnableToLegalize;
838 LLVM_FALLTHROUGH;
839 case TargetOpcode::G_SEXTLOAD:
840 case TargetOpcode::G_ZEXTLOAD:
841 Observer.changingInstr(MI);
842 widenScalarDst(MI, WideTy);
843 Observer.changedInstr(MI);
844 return Legalized;
845
846 case TargetOpcode::G_STORE: {
847 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
848 WideTy != LLT::scalar(8))
849 return UnableToLegalize;
850
851 Observer.changingInstr(MI);
852 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
853 Observer.changedInstr(MI);
854 return Legalized;
855 }
856 case TargetOpcode::G_CONSTANT: {
857 MachineOperand &SrcMO = MI.getOperand(1);
858 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
859 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
860 Observer.changingInstr(MI);
861 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
862
863 widenScalarDst(MI, WideTy);
864 Observer.changedInstr(MI);
865 return Legalized;
866 }
867 case TargetOpcode::G_FCONSTANT: {
868 MachineOperand &SrcMO = MI.getOperand(1);
869 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
870 APFloat Val = SrcMO.getFPImm()->getValueAPF();
871 bool LosesInfo;
872 switch (WideTy.getSizeInBits()) {
873 case 32:
874 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
875 break;
876 case 64:
877 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
878 break;
879 default:
880 llvm_unreachable("Unhandled fp widen type");
881 }
882 Observer.changingInstr(MI);
883 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
884
885 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
886 Observer.changedInstr(MI);
887 return Legalized;
888 }
889 case TargetOpcode::G_IMPLICIT_DEF: {
890 Observer.changingInstr(MI);
891 widenScalarDst(MI, WideTy);
892 Observer.changedInstr(MI);
893 return Legalized;
894 }
895 case TargetOpcode::G_BRCOND:
896 Observer.changingInstr(MI);
897 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
898 Observer.changedInstr(MI);
899 return Legalized;
900
901 case TargetOpcode::G_FCMP:
902 Observer.changingInstr(MI);
903 if (TypeIdx == 0)
904 widenScalarDst(MI, WideTy);
905 else {
906 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
907 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
908 }
909 Observer.changedInstr(MI);
910 return Legalized;
911
912 case TargetOpcode::G_ICMP:
913 Observer.changingInstr(MI);
914 if (TypeIdx == 0)
915 widenScalarDst(MI, WideTy);
916 else {
917 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
918 MI.getOperand(1).getPredicate()))
919 ? TargetOpcode::G_SEXT
920 : TargetOpcode::G_ZEXT;
921 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
922 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
923 }
924 Observer.changedInstr(MI);
925 return Legalized;
926
927 case TargetOpcode::G_GEP:
928 assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
929 Observer.changingInstr(MI);
930 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
931 Observer.changedInstr(MI);
932 return Legalized;
933
934 case TargetOpcode::G_PHI: {
935 assert(TypeIdx == 0 && "Expecting only Idx 0");
936
937 Observer.changingInstr(MI);
938 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
939 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
940 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
941 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
942 }
943
944 MachineBasicBlock &MBB = *MI.getParent();
945 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
946 widenScalarDst(MI, WideTy);
947 Observer.changedInstr(MI);
948 return Legalized;
949 }
950 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
951 if (TypeIdx != 2)
952 return UnableToLegalize;
953 Observer.changingInstr(MI);
954 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
955 Observer.changedInstr(MI);
956 return Legalized;
957
958 case TargetOpcode::G_FCEIL:
959 if (TypeIdx != 0)
960 return UnableToLegalize;
961 Observer.changingInstr(MI);
962 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
963 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
964 Observer.changedInstr(MI);
965 return Legalized;
966 }
967 }
968
969 LegalizerHelper::LegalizeResult
lower(MachineInstr & MI,unsigned TypeIdx,LLT Ty)970 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
971 using namespace TargetOpcode;
972 MIRBuilder.setInstr(MI);
973
974 switch(MI.getOpcode()) {
975 default:
976 return UnableToLegalize;
977 case TargetOpcode::G_SREM:
978 case TargetOpcode::G_UREM: {
979 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
980 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
981 .addDef(QuotReg)
982 .addUse(MI.getOperand(1).getReg())
983 .addUse(MI.getOperand(2).getReg());
984
985 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
986 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
987 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
988 ProdReg);
989 MI.eraseFromParent();
990 return Legalized;
991 }
992 case TargetOpcode::G_SMULO:
993 case TargetOpcode::G_UMULO: {
994 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
995 // result.
996 unsigned Res = MI.getOperand(0).getReg();
997 unsigned Overflow = MI.getOperand(1).getReg();
998 unsigned LHS = MI.getOperand(2).getReg();
999 unsigned RHS = MI.getOperand(3).getReg();
1000
1001 MIRBuilder.buildMul(Res, LHS, RHS);
1002
1003 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1004 ? TargetOpcode::G_SMULH
1005 : TargetOpcode::G_UMULH;
1006
1007 unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1008 MIRBuilder.buildInstr(Opcode)
1009 .addDef(HiPart)
1010 .addUse(LHS)
1011 .addUse(RHS);
1012
1013 unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1014 MIRBuilder.buildConstant(Zero, 0);
1015
1016 // For *signed* multiply, overflow is detected by checking:
1017 // (hi != (lo >> bitwidth-1))
1018 if (Opcode == TargetOpcode::G_SMULH) {
1019 unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1020 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1021 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1022 MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1023 .addDef(Shifted)
1024 .addUse(Res)
1025 .addUse(ShiftAmt);
1026 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1027 } else {
1028 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1029 }
1030 MI.eraseFromParent();
1031 return Legalized;
1032 }
1033 case TargetOpcode::G_FNEG: {
1034 // TODO: Handle vector types once we are able to
1035 // represent them.
1036 if (Ty.isVector())
1037 return UnableToLegalize;
1038 unsigned Res = MI.getOperand(0).getReg();
1039 Type *ZeroTy;
1040 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1041 switch (Ty.getSizeInBits()) {
1042 case 16:
1043 ZeroTy = Type::getHalfTy(Ctx);
1044 break;
1045 case 32:
1046 ZeroTy = Type::getFloatTy(Ctx);
1047 break;
1048 case 64:
1049 ZeroTy = Type::getDoubleTy(Ctx);
1050 break;
1051 case 128:
1052 ZeroTy = Type::getFP128Ty(Ctx);
1053 break;
1054 default:
1055 llvm_unreachable("unexpected floating-point type");
1056 }
1057 ConstantFP &ZeroForNegation =
1058 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1059 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1060 MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
1061 .addDef(Res)
1062 .addUse(Zero->getOperand(0).getReg())
1063 .addUse(MI.getOperand(1).getReg());
1064 MI.eraseFromParent();
1065 return Legalized;
1066 }
1067 case TargetOpcode::G_FSUB: {
1068 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1069 // First, check if G_FNEG is marked as Lower. If so, we may
1070 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1071 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1072 return UnableToLegalize;
1073 unsigned Res = MI.getOperand(0).getReg();
1074 unsigned LHS = MI.getOperand(1).getReg();
1075 unsigned RHS = MI.getOperand(2).getReg();
1076 unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1077 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1078 MIRBuilder.buildInstr(TargetOpcode::G_FADD)
1079 .addDef(Res)
1080 .addUse(LHS)
1081 .addUse(Neg);
1082 MI.eraseFromParent();
1083 return Legalized;
1084 }
1085 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1086 unsigned OldValRes = MI.getOperand(0).getReg();
1087 unsigned SuccessRes = MI.getOperand(1).getReg();
1088 unsigned Addr = MI.getOperand(2).getReg();
1089 unsigned CmpVal = MI.getOperand(3).getReg();
1090 unsigned NewVal = MI.getOperand(4).getReg();
1091 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1092 **MI.memoperands_begin());
1093 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1094 MI.eraseFromParent();
1095 return Legalized;
1096 }
1097 case TargetOpcode::G_LOAD:
1098 case TargetOpcode::G_SEXTLOAD:
1099 case TargetOpcode::G_ZEXTLOAD: {
1100 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1101 unsigned DstReg = MI.getOperand(0).getReg();
1102 unsigned PtrReg = MI.getOperand(1).getReg();
1103 LLT DstTy = MRI.getType(DstReg);
1104 auto &MMO = **MI.memoperands_begin();
1105
1106 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1107 // In the case of G_LOAD, this was a non-extending load already and we're
1108 // about to lower to the same instruction.
1109 if (MI.getOpcode() == TargetOpcode::G_LOAD)
1110 return UnableToLegalize;
1111 MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1112 MI.eraseFromParent();
1113 return Legalized;
1114 }
1115
1116 if (DstTy.isScalar()) {
1117 unsigned TmpReg = MRI.createGenericVirtualRegister(
1118 LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1119 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1120 switch (MI.getOpcode()) {
1121 default:
1122 llvm_unreachable("Unexpected opcode");
1123 case TargetOpcode::G_LOAD:
1124 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1125 break;
1126 case TargetOpcode::G_SEXTLOAD:
1127 MIRBuilder.buildSExt(DstReg, TmpReg);
1128 break;
1129 case TargetOpcode::G_ZEXTLOAD:
1130 MIRBuilder.buildZExt(DstReg, TmpReg);
1131 break;
1132 }
1133 MI.eraseFromParent();
1134 return Legalized;
1135 }
1136
1137 return UnableToLegalize;
1138 }
1139 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1140 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1141 case TargetOpcode::G_CTLZ:
1142 case TargetOpcode::G_CTTZ:
1143 case TargetOpcode::G_CTPOP:
1144 return lowerBitCount(MI, TypeIdx, Ty);
1145 case G_UADDE: {
1146 unsigned Res = MI.getOperand(0).getReg();
1147 unsigned CarryOut = MI.getOperand(1).getReg();
1148 unsigned LHS = MI.getOperand(2).getReg();
1149 unsigned RHS = MI.getOperand(3).getReg();
1150 unsigned CarryIn = MI.getOperand(4).getReg();
1151
1152 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1153 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1154
1155 MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1156 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1157 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1158 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1159
1160 MI.eraseFromParent();
1161 return Legalized;
1162 }
1163 }
1164 }
1165
1166 LegalizerHelper::LegalizeResult
fewerElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)1167 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
1168 LLT NarrowTy) {
1169 // FIXME: Don't know how to handle secondary types yet.
1170 if (TypeIdx != 0)
1171 return UnableToLegalize;
1172
1173 MIRBuilder.setInstr(MI);
1174 switch (MI.getOpcode()) {
1175 default:
1176 return UnableToLegalize;
1177 case TargetOpcode::G_IMPLICIT_DEF: {
1178 SmallVector<unsigned, 2> DstRegs;
1179
1180 unsigned NarrowSize = NarrowTy.getSizeInBits();
1181 unsigned DstReg = MI.getOperand(0).getReg();
1182 unsigned Size = MRI.getType(DstReg).getSizeInBits();
1183 int NumParts = Size / NarrowSize;
1184 // FIXME: Don't know how to handle the situation where the small vectors
1185 // aren't all the same size yet.
1186 if (Size % NarrowSize != 0)
1187 return UnableToLegalize;
1188
1189 for (int i = 0; i < NumParts; ++i) {
1190 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1191 MIRBuilder.buildUndef(TmpReg);
1192 DstRegs.push_back(TmpReg);
1193 }
1194
1195 if (NarrowTy.isVector())
1196 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1197 else
1198 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1199
1200 MI.eraseFromParent();
1201 return Legalized;
1202 }
1203 case TargetOpcode::G_ADD: {
1204 unsigned NarrowSize = NarrowTy.getSizeInBits();
1205 unsigned DstReg = MI.getOperand(0).getReg();
1206 unsigned Size = MRI.getType(DstReg).getSizeInBits();
1207 int NumParts = Size / NarrowSize;
1208 // FIXME: Don't know how to handle the situation where the small vectors
1209 // aren't all the same size yet.
1210 if (Size % NarrowSize != 0)
1211 return UnableToLegalize;
1212
1213 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1214 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
1215 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
1216
1217 for (int i = 0; i < NumParts; ++i) {
1218 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1219 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
1220 DstRegs.push_back(DstReg);
1221 }
1222
1223 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1224 MI.eraseFromParent();
1225 return Legalized;
1226 }
1227 case TargetOpcode::G_LOAD:
1228 case TargetOpcode::G_STORE: {
1229 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1230 unsigned ValReg = MI.getOperand(0).getReg();
1231 unsigned AddrReg = MI.getOperand(1).getReg();
1232 unsigned NarrowSize = NarrowTy.getSizeInBits();
1233 unsigned Size = MRI.getType(ValReg).getSizeInBits();
1234 unsigned NumParts = Size / NarrowSize;
1235
1236 SmallVector<unsigned, 8> NarrowRegs;
1237 if (!IsLoad)
1238 extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
1239
1240 const LLT OffsetTy =
1241 LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
1242 MachineFunction &MF = *MI.getMF();
1243 MachineMemOperand *MMO = *MI.memoperands_begin();
1244 for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
1245 unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
1246 unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
1247 unsigned NewAddrReg = 0;
1248 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
1249 MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
1250 MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
1251 NarrowTy.getSizeInBits() / 8, Alignment);
1252 if (IsLoad) {
1253 unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
1254 NarrowRegs.push_back(Dst);
1255 MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
1256 } else {
1257 MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
1258 }
1259 }
1260 if (IsLoad) {
1261 if (NarrowTy.isVector())
1262 MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
1263 else
1264 MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
1265 }
1266 MI.eraseFromParent();
1267 return Legalized;
1268 }
1269 }
1270 }
1271
1272 LegalizerHelper::LegalizeResult
lowerBitCount(MachineInstr & MI,unsigned TypeIdx,LLT Ty)1273 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1274 unsigned Opc = MI.getOpcode();
1275 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1276 auto isSupported = [this](const LegalityQuery &Q) {
1277 auto QAction = LI.getAction(Q).Action;
1278 return QAction == Legal || QAction == Libcall || QAction == Custom;
1279 };
1280 switch (Opc) {
1281 default:
1282 return UnableToLegalize;
1283 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1284 // This trivially expands to CTLZ.
1285 Observer.changingInstr(MI);
1286 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
1287 Observer.changedInstr(MI);
1288 return Legalized;
1289 }
1290 case TargetOpcode::G_CTLZ: {
1291 unsigned SrcReg = MI.getOperand(1).getReg();
1292 unsigned Len = Ty.getSizeInBits();
1293 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1294 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
1295 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
1296 {Ty}, {SrcReg});
1297 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1298 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1299 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1300 SrcReg, MIBZero);
1301 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1302 MIBCtlzZU);
1303 MI.eraseFromParent();
1304 return Legalized;
1305 }
1306 // for now, we do this:
1307 // NewLen = NextPowerOf2(Len);
1308 // x = x | (x >> 1);
1309 // x = x | (x >> 2);
1310 // ...
1311 // x = x | (x >>16);
1312 // x = x | (x >>32); // for 64-bit input
1313 // Upto NewLen/2
1314 // return Len - popcount(x);
1315 //
1316 // Ref: "Hacker's Delight" by Henry Warren
1317 unsigned Op = SrcReg;
1318 unsigned NewLen = PowerOf2Ceil(Len);
1319 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
1320 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
1321 auto MIBOp = MIRBuilder.buildInstr(
1322 TargetOpcode::G_OR, {Ty},
1323 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
1324 {Op, MIBShiftAmt})});
1325 Op = MIBOp->getOperand(0).getReg();
1326 }
1327 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
1328 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1329 {MIRBuilder.buildConstant(Ty, Len), MIBPop});
1330 MI.eraseFromParent();
1331 return Legalized;
1332 }
1333 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
1334 // This trivially expands to CTTZ.
1335 Observer.changingInstr(MI);
1336 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
1337 Observer.changedInstr(MI);
1338 return Legalized;
1339 }
1340 case TargetOpcode::G_CTTZ: {
1341 unsigned SrcReg = MI.getOperand(1).getReg();
1342 unsigned Len = Ty.getSizeInBits();
1343 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
1344 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1345 // zero.
1346 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
1347 {Ty}, {SrcReg});
1348 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1349 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1350 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1351 SrcReg, MIBZero);
1352 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1353 MIBCttzZU);
1354 MI.eraseFromParent();
1355 return Legalized;
1356 }
1357 // for now, we use: { return popcount(~x & (x - 1)); }
1358 // unless the target has ctlz but not ctpop, in which case we use:
1359 // { return 32 - nlz(~x & (x-1)); }
1360 // Ref: "Hacker's Delight" by Henry Warren
1361 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
1362 auto MIBNot =
1363 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
1364 auto MIBTmp = MIRBuilder.buildInstr(
1365 TargetOpcode::G_AND, {Ty},
1366 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
1367 {SrcReg, MIBCstNeg1})});
1368 if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
1369 isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
1370 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
1371 MIRBuilder.buildInstr(
1372 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
1373 {MIBCstLen,
1374 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
1375 MI.eraseFromParent();
1376 return Legalized;
1377 }
1378 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
1379 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
1380 return Legalized;
1381 }
1382 }
1383 }
1384