1d8ea85acSTom Stellard //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2000c5af3STom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6000c5af3STom Stellard //
7000c5af3STom Stellard //===----------------------------------------------------------------------===//
8000c5af3STom Stellard ///
9000c5af3STom Stellard /// \file
10000c5af3STom Stellard /// This file implements the lowering of LLVM calls to machine code calls for
11000c5af3STom Stellard /// GlobalISel.
12000c5af3STom Stellard ///
13000c5af3STom Stellard //===----------------------------------------------------------------------===//
14000c5af3STom Stellard 
15000c5af3STom Stellard #include "AMDGPUCallLowering.h"
16ca16621bSTom Stellard #include "AMDGPU.h"
17000c5af3STom Stellard #include "AMDGPUISelLowering.h"
18ca16621bSTom Stellard #include "AMDGPUSubtarget.h"
19ca16621bSTom Stellard #include "SIISelLowering.h"
20ca16621bSTom Stellard #include "SIMachineFunctionInfo.h"
216bda14b3SChandler Carruth #include "SIRegisterInfo.h"
2244b30b45STom Stellard #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23206b9927STom Stellard #include "llvm/CodeGen/Analysis.h"
24ca16621bSTom Stellard #include "llvm/CodeGen/CallingConvLower.h"
25000c5af3STom Stellard #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26000c5af3STom Stellard #include "llvm/CodeGen/MachineInstrBuilder.h"
27206b9927STom Stellard #include "llvm/Support/LowLevelTypeImpl.h"
28000c5af3STom Stellard 
29000c5af3STom Stellard using namespace llvm;
30000c5af3STom Stellard 
31206b9927STom Stellard namespace {
32206b9927STom Stellard 
33206b9927STom Stellard struct OutgoingArgHandler : public CallLowering::ValueHandler {
34206b9927STom Stellard   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35206b9927STom Stellard                      MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36206b9927STom Stellard       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37206b9927STom Stellard 
38206b9927STom Stellard   MachineInstrBuilder MIB;
39206b9927STom Stellard 
40faeaedf8SMatt Arsenault   Register getStackAddress(uint64_t Size, int64_t Offset,
41206b9927STom Stellard                            MachinePointerInfo &MPO) override {
42206b9927STom Stellard     llvm_unreachable("not implemented");
43206b9927STom Stellard   }
44206b9927STom Stellard 
45faeaedf8SMatt Arsenault   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
46206b9927STom Stellard                             MachinePointerInfo &MPO, CCValAssign &VA) override {
47206b9927STom Stellard     llvm_unreachable("not implemented");
48206b9927STom Stellard   }
49206b9927STom Stellard 
50faeaedf8SMatt Arsenault   void assignValueToReg(Register ValVReg, Register PhysReg,
51206b9927STom Stellard                         CCValAssign &VA) override {
52206b9927STom Stellard     MIB.addUse(PhysReg);
53206b9927STom Stellard     MIRBuilder.buildCopy(PhysReg, ValVReg);
54206b9927STom Stellard   }
55206b9927STom Stellard 
56206b9927STom Stellard   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
57206b9927STom Stellard                  CCValAssign::LocInfo LocInfo,
58206b9927STom Stellard                  const CallLowering::ArgInfo &Info,
59206b9927STom Stellard                  CCState &State) override {
60206b9927STom Stellard     return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
61206b9927STom Stellard   }
62206b9927STom Stellard };
63206b9927STom Stellard 
64fecf43ebSMatt Arsenault struct IncomingArgHandler : public CallLowering::ValueHandler {
65fecf43ebSMatt Arsenault   uint64_t StackUsed = 0;
66fecf43ebSMatt Arsenault 
67fecf43ebSMatt Arsenault   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
68fecf43ebSMatt Arsenault                      CCAssignFn *AssignFn)
69fecf43ebSMatt Arsenault     : ValueHandler(MIRBuilder, MRI, AssignFn) {}
70fecf43ebSMatt Arsenault 
71fecf43ebSMatt Arsenault   Register getStackAddress(uint64_t Size, int64_t Offset,
72fecf43ebSMatt Arsenault                            MachinePointerInfo &MPO) override {
73fecf43ebSMatt Arsenault     auto &MFI = MIRBuilder.getMF().getFrameInfo();
74fecf43ebSMatt Arsenault     int FI = MFI.CreateFixedObject(Size, Offset, true);
75fecf43ebSMatt Arsenault     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
76fecf43ebSMatt Arsenault     Register AddrReg = MRI.createGenericVirtualRegister(
77fecf43ebSMatt Arsenault       LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
78fecf43ebSMatt Arsenault     MIRBuilder.buildFrameIndex(AddrReg, FI);
79fecf43ebSMatt Arsenault     StackUsed = std::max(StackUsed, Size + Offset);
80fecf43ebSMatt Arsenault     return AddrReg;
81fecf43ebSMatt Arsenault   }
82fecf43ebSMatt Arsenault 
83fecf43ebSMatt Arsenault   void assignValueToReg(Register ValVReg, Register PhysReg,
84fecf43ebSMatt Arsenault                         CCValAssign &VA) override {
85fecf43ebSMatt Arsenault     markPhysRegUsed(PhysReg);
86fecf43ebSMatt Arsenault 
87fecf43ebSMatt Arsenault     if (VA.getLocVT().getSizeInBits() < 32) {
88fecf43ebSMatt Arsenault       // 16-bit types are reported as legal for 32-bit registers. We need to do
89fecf43ebSMatt Arsenault       // a 32-bit copy, and truncate to avoid the verifier complaining about it.
90fecf43ebSMatt Arsenault       auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
91fecf43ebSMatt Arsenault       MIRBuilder.buildTrunc(ValVReg, Copy);
92fecf43ebSMatt Arsenault       return;
93fecf43ebSMatt Arsenault     }
94fecf43ebSMatt Arsenault 
95fecf43ebSMatt Arsenault     switch (VA.getLocInfo()) {
96fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::SExt:
97fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::ZExt:
98fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::AExt: {
99fecf43ebSMatt Arsenault       auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
100fecf43ebSMatt Arsenault       MIRBuilder.buildTrunc(ValVReg, Copy);
101fecf43ebSMatt Arsenault       break;
102fecf43ebSMatt Arsenault     }
103fecf43ebSMatt Arsenault     default:
104fecf43ebSMatt Arsenault       MIRBuilder.buildCopy(ValVReg, PhysReg);
105fecf43ebSMatt Arsenault       break;
106fecf43ebSMatt Arsenault     }
107fecf43ebSMatt Arsenault   }
108fecf43ebSMatt Arsenault 
109fecf43ebSMatt Arsenault   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
110fecf43ebSMatt Arsenault                             MachinePointerInfo &MPO, CCValAssign &VA) override {
111fecf43ebSMatt Arsenault     // FIXME: Get alignment
112fecf43ebSMatt Arsenault     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
113fecf43ebSMatt Arsenault       MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
114fecf43ebSMatt Arsenault     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
115fecf43ebSMatt Arsenault   }
116fecf43ebSMatt Arsenault 
117fecf43ebSMatt Arsenault   /// How the physical register gets marked varies between formal
118fecf43ebSMatt Arsenault   /// parameters (it's a basic-block live-in), and a call instruction
119fecf43ebSMatt Arsenault   /// (it's an implicit-def of the BL).
120fecf43ebSMatt Arsenault   virtual void markPhysRegUsed(unsigned PhysReg) = 0;
121fecf43ebSMatt Arsenault 
122fecf43ebSMatt Arsenault   // FIXME: What is the point of this being a callback?
123fecf43ebSMatt Arsenault   bool isArgumentHandler() const override { return true; }
124fecf43ebSMatt Arsenault };
125fecf43ebSMatt Arsenault 
126fecf43ebSMatt Arsenault struct FormalArgHandler : public IncomingArgHandler {
127fecf43ebSMatt Arsenault   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
128fecf43ebSMatt Arsenault                    CCAssignFn *AssignFn)
129fecf43ebSMatt Arsenault     : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
130fecf43ebSMatt Arsenault 
131fecf43ebSMatt Arsenault   void markPhysRegUsed(unsigned PhysReg) override {
132fecf43ebSMatt Arsenault     MIRBuilder.getMBB().addLiveIn(PhysReg);
133fecf43ebSMatt Arsenault   }
134fecf43ebSMatt Arsenault };
135fecf43ebSMatt Arsenault 
136206b9927STom Stellard }
137206b9927STom Stellard 
138000c5af3STom Stellard AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
1390da6350dSMatt Arsenault   : CallLowering(&TLI) {
140000c5af3STom Stellard }
141000c5af3STom Stellard 
142fecf43ebSMatt Arsenault void AMDGPUCallLowering::splitToValueTypes(
143fecf43ebSMatt Arsenault     const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
144fecf43ebSMatt Arsenault     const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
145fecf43ebSMatt Arsenault     SplitArgTy PerformArgSplit) const {
146fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
147fecf43ebSMatt Arsenault   LLVMContext &Ctx = OrigArg.Ty->getContext();
148fecf43ebSMatt Arsenault 
149fecf43ebSMatt Arsenault   if (OrigArg.Ty->isVoidTy())
150fecf43ebSMatt Arsenault     return;
151fecf43ebSMatt Arsenault 
152fecf43ebSMatt Arsenault   SmallVector<EVT, 4> SplitVTs;
153fecf43ebSMatt Arsenault   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
154fecf43ebSMatt Arsenault 
155b60a2ae4SMatt Arsenault   assert(OrigArg.Regs.size() == SplitVTs.size());
156b60a2ae4SMatt Arsenault 
157b60a2ae4SMatt Arsenault   int SplitIdx = 0;
158b60a2ae4SMatt Arsenault   for (EVT VT : SplitVTs) {
159fecf43ebSMatt Arsenault     unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
160b60a2ae4SMatt Arsenault     Type *Ty = VT.getTypeForEVT(Ctx);
161b60a2ae4SMatt Arsenault 
162b60a2ae4SMatt Arsenault 
163fecf43ebSMatt Arsenault 
164fecf43ebSMatt Arsenault     if (NumParts == 1) {
165fecf43ebSMatt Arsenault       // No splitting to do, but we want to replace the original type (e.g. [1 x
166fecf43ebSMatt Arsenault       // double] -> double).
167b60a2ae4SMatt Arsenault       SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
168fecf43ebSMatt Arsenault                              OrigArg.Flags, OrigArg.IsFixed);
169b60a2ae4SMatt Arsenault 
170b60a2ae4SMatt Arsenault       ++SplitIdx;
171b60a2ae4SMatt Arsenault       continue;
172fecf43ebSMatt Arsenault     }
173fecf43ebSMatt Arsenault 
174b60a2ae4SMatt Arsenault     LLT LLTy = getLLTForType(*Ty, DL);
175b60a2ae4SMatt Arsenault 
176fecf43ebSMatt Arsenault     SmallVector<Register, 8> SplitRegs;
177fecf43ebSMatt Arsenault 
178fecf43ebSMatt Arsenault     EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
179fecf43ebSMatt Arsenault     Type *PartTy = PartVT.getTypeForEVT(Ctx);
180fecf43ebSMatt Arsenault     LLT PartLLT = getLLTForType(*PartTy, DL);
181fecf43ebSMatt Arsenault 
182fecf43ebSMatt Arsenault     // FIXME: Should we be reporting all of the part registers for a single
183fecf43ebSMatt Arsenault     // argument, and let handleAssignments take care of the repacking?
184fecf43ebSMatt Arsenault     for (unsigned i = 0; i < NumParts; ++i) {
185fecf43ebSMatt Arsenault       Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
186fecf43ebSMatt Arsenault       SplitRegs.push_back(PartReg);
187fecf43ebSMatt Arsenault       SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
188fecf43ebSMatt Arsenault     }
189fecf43ebSMatt Arsenault 
190b60a2ae4SMatt Arsenault     PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
191b60a2ae4SMatt Arsenault 
192b60a2ae4SMatt Arsenault     ++SplitIdx;
193b60a2ae4SMatt Arsenault   }
194fecf43ebSMatt Arsenault }
195fecf43ebSMatt Arsenault 
196000c5af3STom Stellard bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
19749168f67SAlexander Ivchenko                                      const Value *Val,
198e3a676e9SMatt Arsenault                                      ArrayRef<Register> VRegs) const {
199206b9927STom Stellard 
200206b9927STom Stellard   MachineFunction &MF = MIRBuilder.getMF();
201206b9927STom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
202206b9927STom Stellard   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
203206b9927STom Stellard   MFI->setIfReturnsVoid(!Val);
204206b9927STom Stellard 
205206b9927STom Stellard   if (!Val) {
206206b9927STom Stellard     MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
207206b9927STom Stellard     return true;
208206b9927STom Stellard   }
209206b9927STom Stellard 
210e3a676e9SMatt Arsenault   Register VReg = VRegs[0];
211206b9927STom Stellard 
212206b9927STom Stellard   const Function &F = MF.getFunction();
213206b9927STom Stellard   auto &DL = F.getParent()->getDataLayout();
214206b9927STom Stellard   if (!AMDGPU::isShader(F.getCallingConv()))
215257882ffSTom Stellard     return false;
216257882ffSTom Stellard 
217206b9927STom Stellard 
218206b9927STom Stellard   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
219206b9927STom Stellard   SmallVector<EVT, 4> SplitVTs;
220206b9927STom Stellard   SmallVector<uint64_t, 4> Offsets;
221206b9927STom Stellard   ArgInfo OrigArg{VReg, Val->getType()};
222206b9927STom Stellard   setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
223206b9927STom Stellard   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
224206b9927STom Stellard 
225206b9927STom Stellard   SmallVector<ArgInfo, 8> SplitArgs;
226206b9927STom Stellard   CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
227206b9927STom Stellard   for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
228206b9927STom Stellard     Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
229206b9927STom Stellard     SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
230206b9927STom Stellard   }
231206b9927STom Stellard   auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
232206b9927STom Stellard   OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
233206b9927STom Stellard   if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
234206b9927STom Stellard     return false;
235206b9927STom Stellard   MIRBuilder.insertInstr(RetInstr);
236206b9927STom Stellard 
237000c5af3STom Stellard   return true;
238000c5af3STom Stellard }
239000c5af3STom Stellard 
240faeaedf8SMatt Arsenault Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
241ca16621bSTom Stellard                                                Type *ParamTy,
24229f30379SMatt Arsenault                                                uint64_t Offset) const {
243ca16621bSTom Stellard 
244ca16621bSTom Stellard   MachineFunction &MF = MIRBuilder.getMF();
2458623e8d8SMatt Arsenault   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
246ca16621bSTom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
247f1caa283SMatthias Braun   const Function &F = MF.getFunction();
248ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
2490da6350dSMatt Arsenault   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
25052b4ce72SDaniel Sanders   LLT PtrType = getLLTForType(*PtrTy, DL);
251faeaedf8SMatt Arsenault   Register DstReg = MRI.createGenericVirtualRegister(PtrType);
252faeaedf8SMatt Arsenault   Register KernArgSegmentPtr =
2538623e8d8SMatt Arsenault     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
254faeaedf8SMatt Arsenault   Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
255ca16621bSTom Stellard 
256faeaedf8SMatt Arsenault   Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
257ca16621bSTom Stellard   MIRBuilder.buildConstant(OffsetReg, Offset);
258ca16621bSTom Stellard 
259ca16621bSTom Stellard   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
260ca16621bSTom Stellard 
261ca16621bSTom Stellard   return DstReg;
262ca16621bSTom Stellard }
263ca16621bSTom Stellard 
264ca16621bSTom Stellard void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
26529f30379SMatt Arsenault                                         Type *ParamTy, uint64_t Offset,
26629f30379SMatt Arsenault                                         unsigned Align,
267e3a676e9SMatt Arsenault                                         Register DstReg) const {
268ca16621bSTom Stellard   MachineFunction &MF = MIRBuilder.getMF();
269f1caa283SMatthias Braun   const Function &F = MF.getFunction();
270ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
2710da6350dSMatt Arsenault   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
272ca16621bSTom Stellard   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
273ca16621bSTom Stellard   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
274e3a676e9SMatt Arsenault   Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
275ca16621bSTom Stellard 
276ca16621bSTom Stellard   MachineMemOperand *MMO =
277ca16621bSTom Stellard       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
278*7df225dfSMatt Arsenault                                        MachineMemOperand::MODereferenceable |
279ca16621bSTom Stellard                                        MachineMemOperand::MOInvariant,
280ca16621bSTom Stellard                                        TypeSize, Align);
281ca16621bSTom Stellard 
282ca16621bSTom Stellard   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
283ca16621bSTom Stellard }
284ca16621bSTom Stellard 
285bae3636fSMatt Arsenault // Allocate special inputs passed in user SGPRs.
286bae3636fSMatt Arsenault static void allocateHSAUserSGPRs(CCState &CCInfo,
287bae3636fSMatt Arsenault                                  MachineIRBuilder &MIRBuilder,
288bae3636fSMatt Arsenault                                  MachineFunction &MF,
289bae3636fSMatt Arsenault                                  const SIRegisterInfo &TRI,
290bae3636fSMatt Arsenault                                  SIMachineFunctionInfo &Info) {
291bae3636fSMatt Arsenault   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
292bae3636fSMatt Arsenault   if (Info.hasPrivateSegmentBuffer()) {
293bae3636fSMatt Arsenault     unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
294bae3636fSMatt Arsenault     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
295bae3636fSMatt Arsenault     CCInfo.AllocateReg(PrivateSegmentBufferReg);
296bae3636fSMatt Arsenault   }
297bae3636fSMatt Arsenault 
298bae3636fSMatt Arsenault   if (Info.hasDispatchPtr()) {
299bae3636fSMatt Arsenault     unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
300bae3636fSMatt Arsenault     MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
301bae3636fSMatt Arsenault     CCInfo.AllocateReg(DispatchPtrReg);
302bae3636fSMatt Arsenault   }
303bae3636fSMatt Arsenault 
304bae3636fSMatt Arsenault   if (Info.hasQueuePtr()) {
305bae3636fSMatt Arsenault     unsigned QueuePtrReg = Info.addQueuePtr(TRI);
306bae3636fSMatt Arsenault     MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
307bae3636fSMatt Arsenault     CCInfo.AllocateReg(QueuePtrReg);
308bae3636fSMatt Arsenault   }
309bae3636fSMatt Arsenault 
310bae3636fSMatt Arsenault   if (Info.hasKernargSegmentPtr()) {
311bae3636fSMatt Arsenault     MachineRegisterInfo &MRI = MF.getRegInfo();
312bae3636fSMatt Arsenault     Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
313bae3636fSMatt Arsenault     const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
314bae3636fSMatt Arsenault     Register VReg = MRI.createGenericVirtualRegister(P4);
315bae3636fSMatt Arsenault     MRI.addLiveIn(InputPtrReg, VReg);
316bae3636fSMatt Arsenault     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
317bae3636fSMatt Arsenault     MIRBuilder.buildCopy(VReg, InputPtrReg);
318bae3636fSMatt Arsenault     CCInfo.AllocateReg(InputPtrReg);
319bae3636fSMatt Arsenault   }
320bae3636fSMatt Arsenault 
321bae3636fSMatt Arsenault   if (Info.hasDispatchID()) {
322bae3636fSMatt Arsenault     unsigned DispatchIDReg = Info.addDispatchID(TRI);
323bae3636fSMatt Arsenault     MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
324bae3636fSMatt Arsenault     CCInfo.AllocateReg(DispatchIDReg);
325bae3636fSMatt Arsenault   }
326bae3636fSMatt Arsenault 
327bae3636fSMatt Arsenault   if (Info.hasFlatScratchInit()) {
328bae3636fSMatt Arsenault     unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
329bae3636fSMatt Arsenault     MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
330bae3636fSMatt Arsenault     CCInfo.AllocateReg(FlatScratchInitReg);
331bae3636fSMatt Arsenault   }
332bae3636fSMatt Arsenault 
333bae3636fSMatt Arsenault   // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
334bae3636fSMatt Arsenault   // these from the dispatch pointer.
335bae3636fSMatt Arsenault }
336bae3636fSMatt Arsenault 
337b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
338c3dbe239SDiana Picus     MachineIRBuilder &MIRBuilder, const Function &F,
339c3dbe239SDiana Picus     ArrayRef<ArrayRef<Register>> VRegs) const {
340ca16621bSTom Stellard   MachineFunction &MF = MIRBuilder.getMF();
3415bfbae5cSTom Stellard   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
342ca16621bSTom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
343ca16621bSTom Stellard   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
344fecf43ebSMatt Arsenault   const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
345fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
346fecf43ebSMatt Arsenault 
347ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
348ca16621bSTom Stellard 
349ca16621bSTom Stellard   SmallVector<CCValAssign, 16> ArgLocs;
350ca16621bSTom Stellard   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
351ca16621bSTom Stellard 
352bae3636fSMatt Arsenault   allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
353bae3636fSMatt Arsenault 
35429f30379SMatt Arsenault   unsigned i = 0;
35529f30379SMatt Arsenault   const unsigned KernArgBaseAlign = 16;
35629f30379SMatt Arsenault   const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
35729f30379SMatt Arsenault   uint64_t ExplicitArgOffset = 0;
35829f30379SMatt Arsenault 
35929f30379SMatt Arsenault   // TODO: Align down to dword alignment and extract bits for extending loads.
36029f30379SMatt Arsenault   for (auto &Arg : F.args()) {
36129f30379SMatt Arsenault     Type *ArgTy = Arg.getType();
36229f30379SMatt Arsenault     unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
36329f30379SMatt Arsenault     if (AllocSize == 0)
36429f30379SMatt Arsenault       continue;
36529f30379SMatt Arsenault 
36629f30379SMatt Arsenault     unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
36729f30379SMatt Arsenault 
36829f30379SMatt Arsenault     uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
36929f30379SMatt Arsenault     ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
37029f30379SMatt Arsenault 
371c3dbe239SDiana Picus     ArrayRef<Register> OrigArgRegs = VRegs[i];
372c3dbe239SDiana Picus     Register ArgReg =
373c3dbe239SDiana Picus       OrigArgRegs.size() == 1
374c3dbe239SDiana Picus       ? OrigArgRegs[0]
375c3dbe239SDiana Picus       : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
37629f30379SMatt Arsenault     unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
37729f30379SMatt Arsenault     ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
378c3dbe239SDiana Picus     lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
379c3dbe239SDiana Picus     if (OrigArgRegs.size() > 1)
380c3dbe239SDiana Picus       unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
38129f30379SMatt Arsenault     ++i;
38229f30379SMatt Arsenault   }
38329f30379SMatt Arsenault 
384fecf43ebSMatt Arsenault   TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
385fecf43ebSMatt Arsenault   TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
38629f30379SMatt Arsenault   return true;
38729f30379SMatt Arsenault }
38829f30379SMatt Arsenault 
389fecf43ebSMatt Arsenault static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder,
390fecf43ebSMatt Arsenault                                     ArrayRef<Register> OrigRegs,
391fecf43ebSMatt Arsenault                                     ArrayRef<Register> Regs,
392fecf43ebSMatt Arsenault                                     LLT LLTy,
393fecf43ebSMatt Arsenault                                     LLT PartLLT) {
394fecf43ebSMatt Arsenault   if (!LLTy.isVector() && !PartLLT.isVector()) {
395fecf43ebSMatt Arsenault     MIRBuilder.buildMerge(OrigRegs[0], Regs);
396fecf43ebSMatt Arsenault     return;
397fecf43ebSMatt Arsenault   }
398fecf43ebSMatt Arsenault 
399fecf43ebSMatt Arsenault   if (LLTy.isVector() && PartLLT.isVector()) {
400fecf43ebSMatt Arsenault     assert(LLTy.getElementType() == PartLLT.getElementType());
401fecf43ebSMatt Arsenault 
402fecf43ebSMatt Arsenault     int DstElts = LLTy.getNumElements();
403fecf43ebSMatt Arsenault     int PartElts = PartLLT.getNumElements();
404fecf43ebSMatt Arsenault     if (DstElts % PartElts == 0)
405fecf43ebSMatt Arsenault       MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
406fecf43ebSMatt Arsenault     else {
407fecf43ebSMatt Arsenault       // Deal with v3s16 split into v2s16
408fecf43ebSMatt Arsenault       assert(PartElts == 2 && DstElts % 2 != 0);
409fecf43ebSMatt Arsenault       int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
410fecf43ebSMatt Arsenault 
411fecf43ebSMatt Arsenault       LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
412fecf43ebSMatt Arsenault       auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
413fecf43ebSMatt Arsenault       MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
414fecf43ebSMatt Arsenault     }
415fecf43ebSMatt Arsenault 
416fecf43ebSMatt Arsenault     return;
417fecf43ebSMatt Arsenault   }
418fecf43ebSMatt Arsenault 
419fecf43ebSMatt Arsenault   assert(LLTy.isVector() && !PartLLT.isVector());
420fecf43ebSMatt Arsenault 
421fecf43ebSMatt Arsenault   LLT DstEltTy = LLTy.getElementType();
422fecf43ebSMatt Arsenault   if (DstEltTy == PartLLT) {
423fecf43ebSMatt Arsenault     // Vector was trivially scalarized.
424fecf43ebSMatt Arsenault     MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
425fecf43ebSMatt Arsenault   } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
426fecf43ebSMatt Arsenault     // Deal with vector with 64-bit elements decomposed to 32-bit
427fecf43ebSMatt Arsenault     // registers. Need to create intermediate 64-bit elements.
428fecf43ebSMatt Arsenault     SmallVector<Register, 8> EltMerges;
429fecf43ebSMatt Arsenault     int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
430fecf43ebSMatt Arsenault 
431fecf43ebSMatt Arsenault     assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
432fecf43ebSMatt Arsenault 
433fecf43ebSMatt Arsenault     for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I)  {
434fecf43ebSMatt Arsenault       auto Merge = MIRBuilder.buildMerge(DstEltTy,
435fecf43ebSMatt Arsenault                                          Regs.take_front(PartsPerElt));
436fecf43ebSMatt Arsenault       EltMerges.push_back(Merge.getReg(0));
437fecf43ebSMatt Arsenault       Regs = Regs.drop_front(PartsPerElt);
438fecf43ebSMatt Arsenault     }
439fecf43ebSMatt Arsenault 
440fecf43ebSMatt Arsenault     MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
441fecf43ebSMatt Arsenault   } else {
442fecf43ebSMatt Arsenault     // Vector was split, and elements promoted to a wider type.
443fecf43ebSMatt Arsenault     LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
444fecf43ebSMatt Arsenault     auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
445fecf43ebSMatt Arsenault     MIRBuilder.buildTrunc(OrigRegs[0], BV);
446fecf43ebSMatt Arsenault   }
447fecf43ebSMatt Arsenault }
448fecf43ebSMatt Arsenault 
449b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArguments(
450b725d273SMatt Arsenault     MachineIRBuilder &MIRBuilder, const Function &F,
451b725d273SMatt Arsenault     ArrayRef<ArrayRef<Register>> VRegs) const {
452fecf43ebSMatt Arsenault   CallingConv::ID CC = F.getCallingConv();
453fecf43ebSMatt Arsenault 
454b725d273SMatt Arsenault   // The infrastructure for normal calling convention lowering is essentially
455b725d273SMatt Arsenault   // useless for kernels. We want to avoid any kind of legalization or argument
456b725d273SMatt Arsenault   // splitting.
457fecf43ebSMatt Arsenault   if (CC == CallingConv::AMDGPU_KERNEL)
458b725d273SMatt Arsenault     return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
459b725d273SMatt Arsenault 
460b725d273SMatt Arsenault   // AMDGPU_GS and AMDGP_HS are not supported yet.
461fecf43ebSMatt Arsenault   if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS)
462b725d273SMatt Arsenault     return false;
463b725d273SMatt Arsenault 
464fecf43ebSMatt Arsenault   const bool IsShader = AMDGPU::isShader(CC);
465fecf43ebSMatt Arsenault   const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
466fecf43ebSMatt Arsenault 
467b725d273SMatt Arsenault   MachineFunction &MF = MIRBuilder.getMF();
468fecf43ebSMatt Arsenault   MachineBasicBlock &MBB = MIRBuilder.getMBB();
469b725d273SMatt Arsenault   MachineRegisterInfo &MRI = MF.getRegInfo();
470b725d273SMatt Arsenault   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
471fecf43ebSMatt Arsenault   const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
472fecf43ebSMatt Arsenault   const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
473b725d273SMatt Arsenault   const DataLayout &DL = F.getParent()->getDataLayout();
474b725d273SMatt Arsenault 
475b725d273SMatt Arsenault 
476b725d273SMatt Arsenault   SmallVector<CCValAssign, 16> ArgLocs;
477fecf43ebSMatt Arsenault   CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
478b725d273SMatt Arsenault 
479bae3636fSMatt Arsenault   if (Info->hasImplicitBufferPtr()) {
480fecf43ebSMatt Arsenault     Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
481bae3636fSMatt Arsenault     MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
482bae3636fSMatt Arsenault     CCInfo.AllocateReg(ImplicitBufferPtrReg);
483bae3636fSMatt Arsenault   }
484bae3636fSMatt Arsenault 
485fecf43ebSMatt Arsenault 
486fecf43ebSMatt Arsenault   SmallVector<ArgInfo, 32> SplitArgs;
487fecf43ebSMatt Arsenault   unsigned Idx = 0;
488c7709e1cSTom Stellard   unsigned PSInputNum = 0;
4899d8337d8STom Stellard 
490fecf43ebSMatt Arsenault   for (auto &Arg : F.args()) {
491fecf43ebSMatt Arsenault     if (DL.getTypeStoreSize(Arg.getType()) == 0)
492c7709e1cSTom Stellard       continue;
493c7709e1cSTom Stellard 
494fecf43ebSMatt Arsenault     const bool InReg = Arg.hasAttribute(Attribute::InReg);
495fecf43ebSMatt Arsenault 
496fecf43ebSMatt Arsenault     // SGPR arguments to functions not implemented.
497fecf43ebSMatt Arsenault     if (!IsShader && InReg)
498fecf43ebSMatt Arsenault       return false;
499fecf43ebSMatt Arsenault 
500b60a2ae4SMatt Arsenault     // TODO: Handle sret.
501fecf43ebSMatt Arsenault     if (Arg.hasAttribute(Attribute::StructRet) ||
502fecf43ebSMatt Arsenault         Arg.hasAttribute(Attribute::SwiftSelf) ||
503fecf43ebSMatt Arsenault         Arg.hasAttribute(Attribute::SwiftError) ||
504b60a2ae4SMatt Arsenault         Arg.hasAttribute(Attribute::Nest))
505fecf43ebSMatt Arsenault       return false;
506fecf43ebSMatt Arsenault 
507fecf43ebSMatt Arsenault     if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
508fecf43ebSMatt Arsenault       const bool ArgUsed = !Arg.use_empty();
509fecf43ebSMatt Arsenault       bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
510fecf43ebSMatt Arsenault 
511fecf43ebSMatt Arsenault       if (!SkipArg) {
512c7709e1cSTom Stellard         Info->markPSInputAllocated(PSInputNum);
513fecf43ebSMatt Arsenault         if (ArgUsed)
514c7709e1cSTom Stellard           Info->markPSInputEnabled(PSInputNum);
515fecf43ebSMatt Arsenault       }
516c7709e1cSTom Stellard 
517c7709e1cSTom Stellard       ++PSInputNum;
518c7709e1cSTom Stellard 
519fecf43ebSMatt Arsenault       if (SkipArg) {
520b60a2ae4SMatt Arsenault         for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
521b60a2ae4SMatt Arsenault           MIRBuilder.buildUndef(VRegs[Idx][I]);
522b60a2ae4SMatt Arsenault 
523fecf43ebSMatt Arsenault         ++Idx;
524c7709e1cSTom Stellard         continue;
525fecf43ebSMatt Arsenault       }
5269d8337d8STom Stellard     }
527e0a4da8cSMatt Arsenault 
528fecf43ebSMatt Arsenault     ArgInfo OrigArg(VRegs[Idx], Arg.getType());
529fecf43ebSMatt Arsenault     setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
530b60a2ae4SMatt Arsenault 
531b60a2ae4SMatt Arsenault     splitToValueTypes(
532b60a2ae4SMatt Arsenault       OrigArg, SplitArgs, DL, MRI, CC,
533fecf43ebSMatt Arsenault       // FIXME: We should probably be passing multiple registers to
534fecf43ebSMatt Arsenault       // handleAssignments to do this
535b60a2ae4SMatt Arsenault       [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
536b60a2ae4SMatt Arsenault         packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs,
537b60a2ae4SMatt Arsenault                                 LLTy, PartLLT);
538fecf43ebSMatt Arsenault       });
539fecf43ebSMatt Arsenault 
540fecf43ebSMatt Arsenault     ++Idx;
5419d8337d8STom Stellard   }
5429d8337d8STom Stellard 
543fecf43ebSMatt Arsenault   // At least one interpolation mode must be enabled or else the GPU will
544fecf43ebSMatt Arsenault   // hang.
545fecf43ebSMatt Arsenault   //
546fecf43ebSMatt Arsenault   // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
547fecf43ebSMatt Arsenault   // set PSInputAddr, the user wants to enable some bits after the compilation
548fecf43ebSMatt Arsenault   // based on run-time states. Since we can't know what the final PSInputEna
549fecf43ebSMatt Arsenault   // will look like, so we shouldn't do anything here and the user should take
550fecf43ebSMatt Arsenault   // responsibility for the correct programming.
551fecf43ebSMatt Arsenault   //
552fecf43ebSMatt Arsenault   // Otherwise, the following restrictions apply:
553fecf43ebSMatt Arsenault   // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
554fecf43ebSMatt Arsenault   // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
555fecf43ebSMatt Arsenault   //   enabled too.
556fecf43ebSMatt Arsenault   if (CC == CallingConv::AMDGPU_PS) {
557fecf43ebSMatt Arsenault     if ((Info->getPSInputAddr() & 0x7F) == 0 ||
558fecf43ebSMatt Arsenault         ((Info->getPSInputAddr() & 0xF) == 0 &&
559fecf43ebSMatt Arsenault          Info->isPSInputAllocated(11))) {
560fecf43ebSMatt Arsenault       CCInfo.AllocateReg(AMDGPU::VGPR0);
561fecf43ebSMatt Arsenault       CCInfo.AllocateReg(AMDGPU::VGPR1);
562fecf43ebSMatt Arsenault       Info->markPSInputAllocated(0);
563fecf43ebSMatt Arsenault       Info->markPSInputEnabled(0);
564fecf43ebSMatt Arsenault     }
565fecf43ebSMatt Arsenault 
566fecf43ebSMatt Arsenault     if (Subtarget.isAmdPalOS()) {
567fecf43ebSMatt Arsenault       // For isAmdPalOS, the user does not enable some bits after compilation
568fecf43ebSMatt Arsenault       // based on run-time states; the register values being generated here are
569fecf43ebSMatt Arsenault       // the final ones set in hardware. Therefore we need to apply the
570fecf43ebSMatt Arsenault       // workaround to PSInputAddr and PSInputEnable together.  (The case where
571fecf43ebSMatt Arsenault       // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
572fecf43ebSMatt Arsenault       // set up an input arg for a particular interpolation mode, but nothing
573fecf43ebSMatt Arsenault       // uses that input arg. Really we should have an earlier pass that removes
574fecf43ebSMatt Arsenault       // such an arg.)
575fecf43ebSMatt Arsenault       unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
576fecf43ebSMatt Arsenault       if ((PsInputBits & 0x7F) == 0 ||
577fecf43ebSMatt Arsenault           ((PsInputBits & 0xF) == 0 &&
578fecf43ebSMatt Arsenault            (PsInputBits >> 11 & 1)))
579fecf43ebSMatt Arsenault         Info->markPSInputEnabled(
580fecf43ebSMatt Arsenault           countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
581fecf43ebSMatt Arsenault     }
582fecf43ebSMatt Arsenault   }
583fecf43ebSMatt Arsenault 
584fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
585fecf43ebSMatt Arsenault   CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
586fecf43ebSMatt Arsenault 
587fecf43ebSMatt Arsenault   if (!MBB.empty())
588fecf43ebSMatt Arsenault     MIRBuilder.setInstr(*MBB.begin());
589fecf43ebSMatt Arsenault 
590fecf43ebSMatt Arsenault   FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
591fecf43ebSMatt Arsenault   if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
59229f30379SMatt Arsenault     return false;
593fecf43ebSMatt Arsenault 
594fecf43ebSMatt Arsenault   if (!IsEntryFunc) {
595fecf43ebSMatt Arsenault     // Special inputs come after user arguments.
596fecf43ebSMatt Arsenault     TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
597fecf43ebSMatt Arsenault   }
598fecf43ebSMatt Arsenault 
599fecf43ebSMatt Arsenault   // Start adding system SGPRs.
600fecf43ebSMatt Arsenault   if (IsEntryFunc) {
601fecf43ebSMatt Arsenault     TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
602fecf43ebSMatt Arsenault   } else {
603fecf43ebSMatt Arsenault     CCInfo.AllocateReg(Info->getScratchRSrcReg());
604fecf43ebSMatt Arsenault     CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
605fecf43ebSMatt Arsenault     CCInfo.AllocateReg(Info->getFrameOffsetReg());
606fecf43ebSMatt Arsenault     TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
607fecf43ebSMatt Arsenault   }
608fecf43ebSMatt Arsenault 
609fecf43ebSMatt Arsenault   // Move back to the end of the basic block.
610fecf43ebSMatt Arsenault   MIRBuilder.setMBB(MBB);
611fecf43ebSMatt Arsenault 
612fecf43ebSMatt Arsenault   return true;
613000c5af3STom Stellard }
614