1d8ea85acSTom Stellard //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2000c5af3STom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6000c5af3STom Stellard //
7000c5af3STom Stellard //===----------------------------------------------------------------------===//
8000c5af3STom Stellard ///
9000c5af3STom Stellard /// \file
10000c5af3STom Stellard /// This file implements the lowering of LLVM calls to machine code calls for
11000c5af3STom Stellard /// GlobalISel.
12000c5af3STom Stellard ///
13000c5af3STom Stellard //===----------------------------------------------------------------------===//
14000c5af3STom Stellard 
15000c5af3STom Stellard #include "AMDGPUCallLowering.h"
16ca16621bSTom Stellard #include "AMDGPU.h"
17000c5af3STom Stellard #include "AMDGPUISelLowering.h"
18ca16621bSTom Stellard #include "AMDGPUSubtarget.h"
19ca16621bSTom Stellard #include "SIISelLowering.h"
20ca16621bSTom Stellard #include "SIMachineFunctionInfo.h"
216bda14b3SChandler Carruth #include "SIRegisterInfo.h"
2244b30b45STom Stellard #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23206b9927STom Stellard #include "llvm/CodeGen/Analysis.h"
24ca16621bSTom Stellard #include "llvm/CodeGen/CallingConvLower.h"
25000c5af3STom Stellard #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26000c5af3STom Stellard #include "llvm/CodeGen/MachineInstrBuilder.h"
27206b9927STom Stellard #include "llvm/Support/LowLevelTypeImpl.h"
28000c5af3STom Stellard 
29000c5af3STom Stellard using namespace llvm;
30000c5af3STom Stellard 
31206b9927STom Stellard namespace {
32206b9927STom Stellard 
33a9ea8a9aSMatt Arsenault struct OutgoingValueHandler : public CallLowering::ValueHandler {
34a9ea8a9aSMatt Arsenault   OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35206b9927STom Stellard                        MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36206b9927STom Stellard       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37206b9927STom Stellard 
38206b9927STom Stellard   MachineInstrBuilder MIB;
39206b9927STom Stellard 
40faeaedf8SMatt Arsenault   Register getStackAddress(uint64_t Size, int64_t Offset,
41206b9927STom Stellard                            MachinePointerInfo &MPO) override {
42206b9927STom Stellard     llvm_unreachable("not implemented");
43206b9927STom Stellard   }
44206b9927STom Stellard 
45faeaedf8SMatt Arsenault   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
46206b9927STom Stellard                             MachinePointerInfo &MPO, CCValAssign &VA) override {
47206b9927STom Stellard     llvm_unreachable("not implemented");
48206b9927STom Stellard   }
49206b9927STom Stellard 
50faeaedf8SMatt Arsenault   void assignValueToReg(Register ValVReg, Register PhysReg,
51206b9927STom Stellard                         CCValAssign &VA) override {
52a9ea8a9aSMatt Arsenault     Register ExtReg;
53a9ea8a9aSMatt Arsenault     if (VA.getLocVT().getSizeInBits() < 32) {
54a9ea8a9aSMatt Arsenault       // 16-bit types are reported as legal for 32-bit registers. We need to
55a9ea8a9aSMatt Arsenault       // extend and do a 32-bit copy to avoid the verifier complaining about it.
56a9ea8a9aSMatt Arsenault       ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
57a9ea8a9aSMatt Arsenault     } else
58a9ea8a9aSMatt Arsenault       ExtReg = extendRegister(ValVReg, VA);
59a9ea8a9aSMatt Arsenault 
60a9ea8a9aSMatt Arsenault     MIRBuilder.buildCopy(PhysReg, ExtReg);
61a9ea8a9aSMatt Arsenault     MIB.addUse(PhysReg, RegState::Implicit);
62206b9927STom Stellard   }
63206b9927STom Stellard 
64206b9927STom Stellard   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
65206b9927STom Stellard                  CCValAssign::LocInfo LocInfo,
66206b9927STom Stellard                  const CallLowering::ArgInfo &Info,
67206b9927STom Stellard                  CCState &State) override {
68206b9927STom Stellard     return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
69206b9927STom Stellard   }
70206b9927STom Stellard };
71206b9927STom Stellard 
72fecf43ebSMatt Arsenault struct IncomingArgHandler : public CallLowering::ValueHandler {
73fecf43ebSMatt Arsenault   uint64_t StackUsed = 0;
74fecf43ebSMatt Arsenault 
75fecf43ebSMatt Arsenault   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
76fecf43ebSMatt Arsenault                      CCAssignFn *AssignFn)
77fecf43ebSMatt Arsenault     : ValueHandler(MIRBuilder, MRI, AssignFn) {}
78fecf43ebSMatt Arsenault 
79fecf43ebSMatt Arsenault   Register getStackAddress(uint64_t Size, int64_t Offset,
80fecf43ebSMatt Arsenault                            MachinePointerInfo &MPO) override {
81fecf43ebSMatt Arsenault     auto &MFI = MIRBuilder.getMF().getFrameInfo();
82fecf43ebSMatt Arsenault     int FI = MFI.CreateFixedObject(Size, Offset, true);
83fecf43ebSMatt Arsenault     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
84fecf43ebSMatt Arsenault     Register AddrReg = MRI.createGenericVirtualRegister(
85fecf43ebSMatt Arsenault       LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
86fecf43ebSMatt Arsenault     MIRBuilder.buildFrameIndex(AddrReg, FI);
87fecf43ebSMatt Arsenault     StackUsed = std::max(StackUsed, Size + Offset);
88fecf43ebSMatt Arsenault     return AddrReg;
89fecf43ebSMatt Arsenault   }
90fecf43ebSMatt Arsenault 
91fecf43ebSMatt Arsenault   void assignValueToReg(Register ValVReg, Register PhysReg,
92fecf43ebSMatt Arsenault                         CCValAssign &VA) override {
93fecf43ebSMatt Arsenault     markPhysRegUsed(PhysReg);
94fecf43ebSMatt Arsenault 
95fecf43ebSMatt Arsenault     if (VA.getLocVT().getSizeInBits() < 32) {
96fecf43ebSMatt Arsenault       // 16-bit types are reported as legal for 32-bit registers. We need to do
97fecf43ebSMatt Arsenault       // a 32-bit copy, and truncate to avoid the verifier complaining about it.
98fecf43ebSMatt Arsenault       auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
99fecf43ebSMatt Arsenault       MIRBuilder.buildTrunc(ValVReg, Copy);
100fecf43ebSMatt Arsenault       return;
101fecf43ebSMatt Arsenault     }
102fecf43ebSMatt Arsenault 
103fecf43ebSMatt Arsenault     switch (VA.getLocInfo()) {
104fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::SExt:
105fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::ZExt:
106fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::AExt: {
107fecf43ebSMatt Arsenault       auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
108fecf43ebSMatt Arsenault       MIRBuilder.buildTrunc(ValVReg, Copy);
109fecf43ebSMatt Arsenault       break;
110fecf43ebSMatt Arsenault     }
111fecf43ebSMatt Arsenault     default:
112fecf43ebSMatt Arsenault       MIRBuilder.buildCopy(ValVReg, PhysReg);
113fecf43ebSMatt Arsenault       break;
114fecf43ebSMatt Arsenault     }
115fecf43ebSMatt Arsenault   }
116fecf43ebSMatt Arsenault 
117fecf43ebSMatt Arsenault   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
118fecf43ebSMatt Arsenault                             MachinePointerInfo &MPO, CCValAssign &VA) override {
119fecf43ebSMatt Arsenault     // FIXME: Get alignment
120fecf43ebSMatt Arsenault     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
121fecf43ebSMatt Arsenault       MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
122fecf43ebSMatt Arsenault     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
123fecf43ebSMatt Arsenault   }
124fecf43ebSMatt Arsenault 
125fecf43ebSMatt Arsenault   /// How the physical register gets marked varies between formal
126fecf43ebSMatt Arsenault   /// parameters (it's a basic-block live-in), and a call instruction
127fecf43ebSMatt Arsenault   /// (it's an implicit-def of the BL).
128fecf43ebSMatt Arsenault   virtual void markPhysRegUsed(unsigned PhysReg) = 0;
129fecf43ebSMatt Arsenault 
130fecf43ebSMatt Arsenault   // FIXME: What is the point of this being a callback?
131*bc1172dfSAmara Emerson   bool isIncomingArgumentHandler() const override { return true; }
132fecf43ebSMatt Arsenault };
133fecf43ebSMatt Arsenault 
134fecf43ebSMatt Arsenault struct FormalArgHandler : public IncomingArgHandler {
135fecf43ebSMatt Arsenault   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
136fecf43ebSMatt Arsenault                    CCAssignFn *AssignFn)
137fecf43ebSMatt Arsenault     : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
138fecf43ebSMatt Arsenault 
139fecf43ebSMatt Arsenault   void markPhysRegUsed(unsigned PhysReg) override {
140fecf43ebSMatt Arsenault     MIRBuilder.getMBB().addLiveIn(PhysReg);
141fecf43ebSMatt Arsenault   }
142fecf43ebSMatt Arsenault };
143fecf43ebSMatt Arsenault 
144206b9927STom Stellard }
145206b9927STom Stellard 
146000c5af3STom Stellard AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
1470da6350dSMatt Arsenault   : CallLowering(&TLI) {
148000c5af3STom Stellard }
149000c5af3STom Stellard 
150fecf43ebSMatt Arsenault void AMDGPUCallLowering::splitToValueTypes(
151fecf43ebSMatt Arsenault     const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
152fecf43ebSMatt Arsenault     const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
153fecf43ebSMatt Arsenault     SplitArgTy PerformArgSplit) const {
154fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
155fecf43ebSMatt Arsenault   LLVMContext &Ctx = OrigArg.Ty->getContext();
156fecf43ebSMatt Arsenault 
157fecf43ebSMatt Arsenault   if (OrigArg.Ty->isVoidTy())
158fecf43ebSMatt Arsenault     return;
159fecf43ebSMatt Arsenault 
160fecf43ebSMatt Arsenault   SmallVector<EVT, 4> SplitVTs;
161fecf43ebSMatt Arsenault   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
162fecf43ebSMatt Arsenault 
163b60a2ae4SMatt Arsenault   assert(OrigArg.Regs.size() == SplitVTs.size());
164b60a2ae4SMatt Arsenault 
165b60a2ae4SMatt Arsenault   int SplitIdx = 0;
166b60a2ae4SMatt Arsenault   for (EVT VT : SplitVTs) {
167fecf43ebSMatt Arsenault     unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
168b60a2ae4SMatt Arsenault     Type *Ty = VT.getTypeForEVT(Ctx);
169b60a2ae4SMatt Arsenault 
170b60a2ae4SMatt Arsenault 
171fecf43ebSMatt Arsenault 
172fecf43ebSMatt Arsenault     if (NumParts == 1) {
173fecf43ebSMatt Arsenault       // No splitting to do, but we want to replace the original type (e.g. [1 x
174fecf43ebSMatt Arsenault       // double] -> double).
175b60a2ae4SMatt Arsenault       SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
176fecf43ebSMatt Arsenault                              OrigArg.Flags, OrigArg.IsFixed);
177b60a2ae4SMatt Arsenault 
178b60a2ae4SMatt Arsenault       ++SplitIdx;
179b60a2ae4SMatt Arsenault       continue;
180fecf43ebSMatt Arsenault     }
181fecf43ebSMatt Arsenault 
182b60a2ae4SMatt Arsenault     LLT LLTy = getLLTForType(*Ty, DL);
183b60a2ae4SMatt Arsenault 
184fecf43ebSMatt Arsenault     SmallVector<Register, 8> SplitRegs;
185fecf43ebSMatt Arsenault 
186fecf43ebSMatt Arsenault     EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
187fecf43ebSMatt Arsenault     Type *PartTy = PartVT.getTypeForEVT(Ctx);
188fecf43ebSMatt Arsenault     LLT PartLLT = getLLTForType(*PartTy, DL);
189fecf43ebSMatt Arsenault 
190fecf43ebSMatt Arsenault     // FIXME: Should we be reporting all of the part registers for a single
191fecf43ebSMatt Arsenault     // argument, and let handleAssignments take care of the repacking?
192fecf43ebSMatt Arsenault     for (unsigned i = 0; i < NumParts; ++i) {
193fecf43ebSMatt Arsenault       Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
194fecf43ebSMatt Arsenault       SplitRegs.push_back(PartReg);
195fecf43ebSMatt Arsenault       SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
196fecf43ebSMatt Arsenault     }
197fecf43ebSMatt Arsenault 
198b60a2ae4SMatt Arsenault     PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
199b60a2ae4SMatt Arsenault 
200b60a2ae4SMatt Arsenault     ++SplitIdx;
201b60a2ae4SMatt Arsenault   }
202fecf43ebSMatt Arsenault }
203fecf43ebSMatt Arsenault 
204a9ea8a9aSMatt Arsenault // Get the appropriate type to make \p OrigTy \p Factor times bigger.
205a9ea8a9aSMatt Arsenault static LLT getMultipleType(LLT OrigTy, int Factor) {
206a9ea8a9aSMatt Arsenault   if (OrigTy.isVector()) {
207a9ea8a9aSMatt Arsenault     return LLT::vector(OrigTy.getNumElements() * Factor,
208a9ea8a9aSMatt Arsenault                        OrigTy.getElementType());
209a9ea8a9aSMatt Arsenault   }
210a9ea8a9aSMatt Arsenault 
211a9ea8a9aSMatt Arsenault   return LLT::scalar(OrigTy.getSizeInBits() * Factor);
212a9ea8a9aSMatt Arsenault }
213a9ea8a9aSMatt Arsenault 
214a9ea8a9aSMatt Arsenault // TODO: Move to generic code
215a9ea8a9aSMatt Arsenault static void unpackRegsToOrigType(MachineIRBuilder &MIRBuilder,
216a9ea8a9aSMatt Arsenault                                  ArrayRef<Register> DstRegs,
217a9ea8a9aSMatt Arsenault                                  Register SrcReg,
218a9ea8a9aSMatt Arsenault                                  LLT SrcTy,
219a9ea8a9aSMatt Arsenault                                  LLT PartTy) {
220a9ea8a9aSMatt Arsenault   assert(DstRegs.size() > 1 && "Nothing to unpack");
221a9ea8a9aSMatt Arsenault 
222a9ea8a9aSMatt Arsenault   MachineFunction &MF = MIRBuilder.getMF();
223a9ea8a9aSMatt Arsenault   MachineRegisterInfo &MRI = MF.getRegInfo();
224a9ea8a9aSMatt Arsenault 
225a9ea8a9aSMatt Arsenault   const unsigned SrcSize = SrcTy.getSizeInBits();
226a9ea8a9aSMatt Arsenault   const unsigned PartSize = PartTy.getSizeInBits();
227a9ea8a9aSMatt Arsenault 
228a9ea8a9aSMatt Arsenault   if (SrcTy.isVector() && !PartTy.isVector() &&
229a9ea8a9aSMatt Arsenault       PartSize > SrcTy.getElementType().getSizeInBits()) {
230a9ea8a9aSMatt Arsenault     // Vector was scalarized, and the elements extended.
231a9ea8a9aSMatt Arsenault     auto UnmergeToEltTy = MIRBuilder.buildUnmerge(SrcTy.getElementType(),
232a9ea8a9aSMatt Arsenault                                                   SrcReg);
233a9ea8a9aSMatt Arsenault     for (int i = 0, e = DstRegs.size(); i != e; ++i)
234a9ea8a9aSMatt Arsenault       MIRBuilder.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
235a9ea8a9aSMatt Arsenault     return;
236a9ea8a9aSMatt Arsenault   }
237a9ea8a9aSMatt Arsenault 
238a9ea8a9aSMatt Arsenault   if (SrcSize % PartSize == 0) {
239a9ea8a9aSMatt Arsenault     MIRBuilder.buildUnmerge(DstRegs, SrcReg);
240a9ea8a9aSMatt Arsenault     return;
241a9ea8a9aSMatt Arsenault   }
242a9ea8a9aSMatt Arsenault 
243a9ea8a9aSMatt Arsenault   const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize;
244a9ea8a9aSMatt Arsenault 
245a9ea8a9aSMatt Arsenault   LLT BigTy = getMultipleType(PartTy, NumRoundedParts);
246a9ea8a9aSMatt Arsenault   auto ImpDef = MIRBuilder.buildUndef(BigTy);
247a9ea8a9aSMatt Arsenault 
248a9ea8a9aSMatt Arsenault   Register BigReg = MRI.createGenericVirtualRegister(BigTy);
249a9ea8a9aSMatt Arsenault   MIRBuilder.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0);
250a9ea8a9aSMatt Arsenault 
251a9ea8a9aSMatt Arsenault   int64_t Offset = 0;
252a9ea8a9aSMatt Arsenault   for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize)
253a9ea8a9aSMatt Arsenault     MIRBuilder.buildExtract(DstRegs[i], BigReg, Offset);
254a9ea8a9aSMatt Arsenault }
255a9ea8a9aSMatt Arsenault 
256a9ea8a9aSMatt Arsenault /// Lower the return value for the already existing \p Ret. This assumes that
257a9ea8a9aSMatt Arsenault /// \p MIRBuilder's insertion point is correct.
258a9ea8a9aSMatt Arsenault bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
259a9ea8a9aSMatt Arsenault                                         const Value *Val, ArrayRef<Register> VRegs,
260a9ea8a9aSMatt Arsenault                                         MachineInstrBuilder &Ret) const {
261a9ea8a9aSMatt Arsenault   if (!Val)
262a9ea8a9aSMatt Arsenault     return true;
263a9ea8a9aSMatt Arsenault 
264a9ea8a9aSMatt Arsenault   auto &MF = MIRBuilder.getMF();
265a9ea8a9aSMatt Arsenault   const auto &F = MF.getFunction();
266a9ea8a9aSMatt Arsenault   const DataLayout &DL = MF.getDataLayout();
267a9ea8a9aSMatt Arsenault 
268a9ea8a9aSMatt Arsenault   CallingConv::ID CC = F.getCallingConv();
269a9ea8a9aSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
270a9ea8a9aSMatt Arsenault   MachineRegisterInfo &MRI = MF.getRegInfo();
271a9ea8a9aSMatt Arsenault 
272a9ea8a9aSMatt Arsenault   ArgInfo OrigRetInfo(VRegs, Val->getType());
273a9ea8a9aSMatt Arsenault   setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
274a9ea8a9aSMatt Arsenault   SmallVector<ArgInfo, 4> SplitRetInfos;
275a9ea8a9aSMatt Arsenault 
276a9ea8a9aSMatt Arsenault   splitToValueTypes(
277a9ea8a9aSMatt Arsenault     OrigRetInfo, SplitRetInfos, DL, MRI, CC,
278a9ea8a9aSMatt Arsenault     [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
279a9ea8a9aSMatt Arsenault       unpackRegsToOrigType(MIRBuilder, Regs, VRegs[VTSplitIdx], LLTy, PartLLT);
280a9ea8a9aSMatt Arsenault     });
281a9ea8a9aSMatt Arsenault 
282a9ea8a9aSMatt Arsenault   CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
283a9ea8a9aSMatt Arsenault 
284a9ea8a9aSMatt Arsenault   OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
285a9ea8a9aSMatt Arsenault   return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
286a9ea8a9aSMatt Arsenault }
287a9ea8a9aSMatt Arsenault 
288000c5af3STom Stellard bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
28949168f67SAlexander Ivchenko                                      const Value *Val,
290e3a676e9SMatt Arsenault                                      ArrayRef<Register> VRegs) const {
291206b9927STom Stellard 
292206b9927STom Stellard   MachineFunction &MF = MIRBuilder.getMF();
293206b9927STom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
294206b9927STom Stellard   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
295206b9927STom Stellard   MFI->setIfReturnsVoid(!Val);
296206b9927STom Stellard 
297a9ea8a9aSMatt Arsenault   assert(!Val == VRegs.empty() && "Return value without a vreg");
298a9ea8a9aSMatt Arsenault 
299a9ea8a9aSMatt Arsenault   CallingConv::ID CC = MIRBuilder.getMF().getFunction().getCallingConv();
300a9ea8a9aSMatt Arsenault   const bool IsShader = AMDGPU::isShader(CC);
301a9ea8a9aSMatt Arsenault   const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) ||
302a9ea8a9aSMatt Arsenault                          AMDGPU::isKernel(CC);
303a9ea8a9aSMatt Arsenault   if (IsWaveEnd) {
304a9ea8a9aSMatt Arsenault     MIRBuilder.buildInstr(AMDGPU::S_ENDPGM)
305a9ea8a9aSMatt Arsenault       .addImm(0);
306206b9927STom Stellard     return true;
307206b9927STom Stellard   }
308206b9927STom Stellard 
309a9ea8a9aSMatt Arsenault   auto const &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
310206b9927STom Stellard 
311711556e6SMichael Liao   unsigned ReturnOpc =
312711556e6SMichael Liao       IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
313257882ffSTom Stellard 
314a9ea8a9aSMatt Arsenault   auto Ret = MIRBuilder.buildInstrNoInsert(ReturnOpc);
315a9ea8a9aSMatt Arsenault   Register ReturnAddrVReg;
316a9ea8a9aSMatt Arsenault   if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
317a9ea8a9aSMatt Arsenault     ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
318a9ea8a9aSMatt Arsenault     Ret.addUse(ReturnAddrVReg);
319206b9927STom Stellard   }
320206b9927STom Stellard 
321a9ea8a9aSMatt Arsenault   if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret))
322a9ea8a9aSMatt Arsenault     return false;
323a9ea8a9aSMatt Arsenault 
324a9ea8a9aSMatt Arsenault   if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
325a9ea8a9aSMatt Arsenault     const SIRegisterInfo *TRI = ST.getRegisterInfo();
326a9ea8a9aSMatt Arsenault     Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
327a9ea8a9aSMatt Arsenault                                          &AMDGPU::SGPR_64RegClass);
328a9ea8a9aSMatt Arsenault     MIRBuilder.buildCopy(ReturnAddrVReg, LiveInReturn);
329a9ea8a9aSMatt Arsenault   }
330a9ea8a9aSMatt Arsenault 
331a9ea8a9aSMatt Arsenault   // TODO: Handle CalleeSavedRegsViaCopy.
332a9ea8a9aSMatt Arsenault 
333a9ea8a9aSMatt Arsenault   MIRBuilder.insertInstr(Ret);
334000c5af3STom Stellard   return true;
335000c5af3STom Stellard }
336000c5af3STom Stellard 
337faeaedf8SMatt Arsenault Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
338ca16621bSTom Stellard                                                Type *ParamTy,
33929f30379SMatt Arsenault                                                uint64_t Offset) const {
340ca16621bSTom Stellard 
341ca16621bSTom Stellard   MachineFunction &MF = MIRBuilder.getMF();
3428623e8d8SMatt Arsenault   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
343ca16621bSTom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
344f1caa283SMatthias Braun   const Function &F = MF.getFunction();
345ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
3460da6350dSMatt Arsenault   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
34752b4ce72SDaniel Sanders   LLT PtrType = getLLTForType(*PtrTy, DL);
348faeaedf8SMatt Arsenault   Register DstReg = MRI.createGenericVirtualRegister(PtrType);
349faeaedf8SMatt Arsenault   Register KernArgSegmentPtr =
3508623e8d8SMatt Arsenault     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
351faeaedf8SMatt Arsenault   Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
352ca16621bSTom Stellard 
353faeaedf8SMatt Arsenault   Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
354ca16621bSTom Stellard   MIRBuilder.buildConstant(OffsetReg, Offset);
355ca16621bSTom Stellard 
356ca16621bSTom Stellard   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
357ca16621bSTom Stellard 
358ca16621bSTom Stellard   return DstReg;
359ca16621bSTom Stellard }
360ca16621bSTom Stellard 
361ca16621bSTom Stellard void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
36229f30379SMatt Arsenault                                         Type *ParamTy, uint64_t Offset,
36329f30379SMatt Arsenault                                         unsigned Align,
364e3a676e9SMatt Arsenault                                         Register DstReg) const {
365ca16621bSTom Stellard   MachineFunction &MF = MIRBuilder.getMF();
366f1caa283SMatthias Braun   const Function &F = MF.getFunction();
367ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
3680da6350dSMatt Arsenault   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
369ca16621bSTom Stellard   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
370ca16621bSTom Stellard   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
371e3a676e9SMatt Arsenault   Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
372ca16621bSTom Stellard 
373ca16621bSTom Stellard   MachineMemOperand *MMO =
374ca16621bSTom Stellard       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
3757df225dfSMatt Arsenault                                        MachineMemOperand::MODereferenceable |
376ca16621bSTom Stellard                                        MachineMemOperand::MOInvariant,
377ca16621bSTom Stellard                                        TypeSize, Align);
378ca16621bSTom Stellard 
379ca16621bSTom Stellard   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
380ca16621bSTom Stellard }
381ca16621bSTom Stellard 
382bae3636fSMatt Arsenault // Allocate special inputs passed in user SGPRs.
383bae3636fSMatt Arsenault static void allocateHSAUserSGPRs(CCState &CCInfo,
384bae3636fSMatt Arsenault                                  MachineIRBuilder &MIRBuilder,
385bae3636fSMatt Arsenault                                  MachineFunction &MF,
386bae3636fSMatt Arsenault                                  const SIRegisterInfo &TRI,
387bae3636fSMatt Arsenault                                  SIMachineFunctionInfo &Info) {
388bae3636fSMatt Arsenault   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
389bae3636fSMatt Arsenault   if (Info.hasPrivateSegmentBuffer()) {
390bae3636fSMatt Arsenault     unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
391bae3636fSMatt Arsenault     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
392bae3636fSMatt Arsenault     CCInfo.AllocateReg(PrivateSegmentBufferReg);
393bae3636fSMatt Arsenault   }
394bae3636fSMatt Arsenault 
395bae3636fSMatt Arsenault   if (Info.hasDispatchPtr()) {
396bae3636fSMatt Arsenault     unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
397bae3636fSMatt Arsenault     MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
398bae3636fSMatt Arsenault     CCInfo.AllocateReg(DispatchPtrReg);
399bae3636fSMatt Arsenault   }
400bae3636fSMatt Arsenault 
401bae3636fSMatt Arsenault   if (Info.hasQueuePtr()) {
402bae3636fSMatt Arsenault     unsigned QueuePtrReg = Info.addQueuePtr(TRI);
403bae3636fSMatt Arsenault     MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
404bae3636fSMatt Arsenault     CCInfo.AllocateReg(QueuePtrReg);
405bae3636fSMatt Arsenault   }
406bae3636fSMatt Arsenault 
407bae3636fSMatt Arsenault   if (Info.hasKernargSegmentPtr()) {
408bae3636fSMatt Arsenault     MachineRegisterInfo &MRI = MF.getRegInfo();
409bae3636fSMatt Arsenault     Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
410bae3636fSMatt Arsenault     const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
411bae3636fSMatt Arsenault     Register VReg = MRI.createGenericVirtualRegister(P4);
412bae3636fSMatt Arsenault     MRI.addLiveIn(InputPtrReg, VReg);
413bae3636fSMatt Arsenault     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
414bae3636fSMatt Arsenault     MIRBuilder.buildCopy(VReg, InputPtrReg);
415bae3636fSMatt Arsenault     CCInfo.AllocateReg(InputPtrReg);
416bae3636fSMatt Arsenault   }
417bae3636fSMatt Arsenault 
418bae3636fSMatt Arsenault   if (Info.hasDispatchID()) {
419bae3636fSMatt Arsenault     unsigned DispatchIDReg = Info.addDispatchID(TRI);
420bae3636fSMatt Arsenault     MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
421bae3636fSMatt Arsenault     CCInfo.AllocateReg(DispatchIDReg);
422bae3636fSMatt Arsenault   }
423bae3636fSMatt Arsenault 
424bae3636fSMatt Arsenault   if (Info.hasFlatScratchInit()) {
425bae3636fSMatt Arsenault     unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
426bae3636fSMatt Arsenault     MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
427bae3636fSMatt Arsenault     CCInfo.AllocateReg(FlatScratchInitReg);
428bae3636fSMatt Arsenault   }
429bae3636fSMatt Arsenault 
430bae3636fSMatt Arsenault   // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
431bae3636fSMatt Arsenault   // these from the dispatch pointer.
432bae3636fSMatt Arsenault }
433bae3636fSMatt Arsenault 
434b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
435c3dbe239SDiana Picus     MachineIRBuilder &MIRBuilder, const Function &F,
436c3dbe239SDiana Picus     ArrayRef<ArrayRef<Register>> VRegs) const {
437ca16621bSTom Stellard   MachineFunction &MF = MIRBuilder.getMF();
4385bfbae5cSTom Stellard   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
439ca16621bSTom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
440ca16621bSTom Stellard   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
441fecf43ebSMatt Arsenault   const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
442fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
443fecf43ebSMatt Arsenault 
444ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
445ca16621bSTom Stellard 
446ca16621bSTom Stellard   SmallVector<CCValAssign, 16> ArgLocs;
447ca16621bSTom Stellard   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
448ca16621bSTom Stellard 
449bae3636fSMatt Arsenault   allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
450bae3636fSMatt Arsenault 
45129f30379SMatt Arsenault   unsigned i = 0;
45229f30379SMatt Arsenault   const unsigned KernArgBaseAlign = 16;
45329f30379SMatt Arsenault   const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
45429f30379SMatt Arsenault   uint64_t ExplicitArgOffset = 0;
45529f30379SMatt Arsenault 
45629f30379SMatt Arsenault   // TODO: Align down to dword alignment and extract bits for extending loads.
45729f30379SMatt Arsenault   for (auto &Arg : F.args()) {
45829f30379SMatt Arsenault     Type *ArgTy = Arg.getType();
45929f30379SMatt Arsenault     unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
46029f30379SMatt Arsenault     if (AllocSize == 0)
46129f30379SMatt Arsenault       continue;
46229f30379SMatt Arsenault 
46329f30379SMatt Arsenault     unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
46429f30379SMatt Arsenault 
46529f30379SMatt Arsenault     uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
46629f30379SMatt Arsenault     ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
46729f30379SMatt Arsenault 
468c3dbe239SDiana Picus     ArrayRef<Register> OrigArgRegs = VRegs[i];
469c3dbe239SDiana Picus     Register ArgReg =
470c3dbe239SDiana Picus       OrigArgRegs.size() == 1
471c3dbe239SDiana Picus       ? OrigArgRegs[0]
472c3dbe239SDiana Picus       : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
47329f30379SMatt Arsenault     unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
47429f30379SMatt Arsenault     ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
475c3dbe239SDiana Picus     lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
476c3dbe239SDiana Picus     if (OrigArgRegs.size() > 1)
477c3dbe239SDiana Picus       unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
47829f30379SMatt Arsenault     ++i;
47929f30379SMatt Arsenault   }
48029f30379SMatt Arsenault 
481fecf43ebSMatt Arsenault   TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
482fecf43ebSMatt Arsenault   TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
48329f30379SMatt Arsenault   return true;
48429f30379SMatt Arsenault }
48529f30379SMatt Arsenault 
486a9ea8a9aSMatt Arsenault // TODO: Move this to generic code
487fecf43ebSMatt Arsenault static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder,
488fecf43ebSMatt Arsenault                                     ArrayRef<Register> OrigRegs,
489fecf43ebSMatt Arsenault                                     ArrayRef<Register> Regs,
490fecf43ebSMatt Arsenault                                     LLT LLTy,
491fecf43ebSMatt Arsenault                                     LLT PartLLT) {
492fecf43ebSMatt Arsenault   if (!LLTy.isVector() && !PartLLT.isVector()) {
493fecf43ebSMatt Arsenault     MIRBuilder.buildMerge(OrigRegs[0], Regs);
494fecf43ebSMatt Arsenault     return;
495fecf43ebSMatt Arsenault   }
496fecf43ebSMatt Arsenault 
497fecf43ebSMatt Arsenault   if (LLTy.isVector() && PartLLT.isVector()) {
498fecf43ebSMatt Arsenault     assert(LLTy.getElementType() == PartLLT.getElementType());
499fecf43ebSMatt Arsenault 
500fecf43ebSMatt Arsenault     int DstElts = LLTy.getNumElements();
501fecf43ebSMatt Arsenault     int PartElts = PartLLT.getNumElements();
502fecf43ebSMatt Arsenault     if (DstElts % PartElts == 0)
503fecf43ebSMatt Arsenault       MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
504fecf43ebSMatt Arsenault     else {
505fecf43ebSMatt Arsenault       // Deal with v3s16 split into v2s16
506fecf43ebSMatt Arsenault       assert(PartElts == 2 && DstElts % 2 != 0);
507fecf43ebSMatt Arsenault       int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
508fecf43ebSMatt Arsenault 
509fecf43ebSMatt Arsenault       LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
510fecf43ebSMatt Arsenault       auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
511fecf43ebSMatt Arsenault       MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
512fecf43ebSMatt Arsenault     }
513fecf43ebSMatt Arsenault 
514fecf43ebSMatt Arsenault     return;
515fecf43ebSMatt Arsenault   }
516fecf43ebSMatt Arsenault 
517fecf43ebSMatt Arsenault   assert(LLTy.isVector() && !PartLLT.isVector());
518fecf43ebSMatt Arsenault 
519fecf43ebSMatt Arsenault   LLT DstEltTy = LLTy.getElementType();
520fecf43ebSMatt Arsenault   if (DstEltTy == PartLLT) {
521fecf43ebSMatt Arsenault     // Vector was trivially scalarized.
522fecf43ebSMatt Arsenault     MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
523fecf43ebSMatt Arsenault   } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
524fecf43ebSMatt Arsenault     // Deal with vector with 64-bit elements decomposed to 32-bit
525fecf43ebSMatt Arsenault     // registers. Need to create intermediate 64-bit elements.
526fecf43ebSMatt Arsenault     SmallVector<Register, 8> EltMerges;
527fecf43ebSMatt Arsenault     int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
528fecf43ebSMatt Arsenault 
529fecf43ebSMatt Arsenault     assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
530fecf43ebSMatt Arsenault 
531fecf43ebSMatt Arsenault     for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I)  {
532fecf43ebSMatt Arsenault       auto Merge = MIRBuilder.buildMerge(DstEltTy,
533fecf43ebSMatt Arsenault                                          Regs.take_front(PartsPerElt));
534fecf43ebSMatt Arsenault       EltMerges.push_back(Merge.getReg(0));
535fecf43ebSMatt Arsenault       Regs = Regs.drop_front(PartsPerElt);
536fecf43ebSMatt Arsenault     }
537fecf43ebSMatt Arsenault 
538fecf43ebSMatt Arsenault     MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
539fecf43ebSMatt Arsenault   } else {
540fecf43ebSMatt Arsenault     // Vector was split, and elements promoted to a wider type.
541fecf43ebSMatt Arsenault     LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
542fecf43ebSMatt Arsenault     auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
543fecf43ebSMatt Arsenault     MIRBuilder.buildTrunc(OrigRegs[0], BV);
544fecf43ebSMatt Arsenault   }
545fecf43ebSMatt Arsenault }
546fecf43ebSMatt Arsenault 
547b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArguments(
548b725d273SMatt Arsenault     MachineIRBuilder &MIRBuilder, const Function &F,
549b725d273SMatt Arsenault     ArrayRef<ArrayRef<Register>> VRegs) const {
550fecf43ebSMatt Arsenault   CallingConv::ID CC = F.getCallingConv();
551fecf43ebSMatt Arsenault 
552b725d273SMatt Arsenault   // The infrastructure for normal calling convention lowering is essentially
553b725d273SMatt Arsenault   // useless for kernels. We want to avoid any kind of legalization or argument
554b725d273SMatt Arsenault   // splitting.
555fecf43ebSMatt Arsenault   if (CC == CallingConv::AMDGPU_KERNEL)
556b725d273SMatt Arsenault     return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
557b725d273SMatt Arsenault 
558fecf43ebSMatt Arsenault   const bool IsShader = AMDGPU::isShader(CC);
559fecf43ebSMatt Arsenault   const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
560fecf43ebSMatt Arsenault 
561b725d273SMatt Arsenault   MachineFunction &MF = MIRBuilder.getMF();
562fecf43ebSMatt Arsenault   MachineBasicBlock &MBB = MIRBuilder.getMBB();
563b725d273SMatt Arsenault   MachineRegisterInfo &MRI = MF.getRegInfo();
564b725d273SMatt Arsenault   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
565fecf43ebSMatt Arsenault   const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
566fecf43ebSMatt Arsenault   const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
567b725d273SMatt Arsenault   const DataLayout &DL = F.getParent()->getDataLayout();
568b725d273SMatt Arsenault 
569b725d273SMatt Arsenault 
570b725d273SMatt Arsenault   SmallVector<CCValAssign, 16> ArgLocs;
571fecf43ebSMatt Arsenault   CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
572b725d273SMatt Arsenault 
573a9ea8a9aSMatt Arsenault   if (!IsEntryFunc) {
574a9ea8a9aSMatt Arsenault     Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
575a9ea8a9aSMatt Arsenault     Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
576a9ea8a9aSMatt Arsenault                                          &AMDGPU::SGPR_64RegClass);
577a9ea8a9aSMatt Arsenault     MBB.addLiveIn(ReturnAddrReg);
578a9ea8a9aSMatt Arsenault     MIRBuilder.buildCopy(LiveInReturn, ReturnAddrReg);
579a9ea8a9aSMatt Arsenault   }
580a9ea8a9aSMatt Arsenault 
581bae3636fSMatt Arsenault   if (Info->hasImplicitBufferPtr()) {
582fecf43ebSMatt Arsenault     Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
583bae3636fSMatt Arsenault     MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
584bae3636fSMatt Arsenault     CCInfo.AllocateReg(ImplicitBufferPtrReg);
585bae3636fSMatt Arsenault   }
586bae3636fSMatt Arsenault 
587fecf43ebSMatt Arsenault 
588fecf43ebSMatt Arsenault   SmallVector<ArgInfo, 32> SplitArgs;
589fecf43ebSMatt Arsenault   unsigned Idx = 0;
590c7709e1cSTom Stellard   unsigned PSInputNum = 0;
5919d8337d8STom Stellard 
592fecf43ebSMatt Arsenault   for (auto &Arg : F.args()) {
593fecf43ebSMatt Arsenault     if (DL.getTypeStoreSize(Arg.getType()) == 0)
594c7709e1cSTom Stellard       continue;
595c7709e1cSTom Stellard 
596fecf43ebSMatt Arsenault     const bool InReg = Arg.hasAttribute(Attribute::InReg);
597fecf43ebSMatt Arsenault 
598fecf43ebSMatt Arsenault     // SGPR arguments to functions not implemented.
599fecf43ebSMatt Arsenault     if (!IsShader && InReg)
600fecf43ebSMatt Arsenault       return false;
601fecf43ebSMatt Arsenault 
602a9ea8a9aSMatt Arsenault     if (Arg.hasAttribute(Attribute::SwiftSelf) ||
603fecf43ebSMatt Arsenault         Arg.hasAttribute(Attribute::SwiftError) ||
604b60a2ae4SMatt Arsenault         Arg.hasAttribute(Attribute::Nest))
605fecf43ebSMatt Arsenault       return false;
606fecf43ebSMatt Arsenault 
607fecf43ebSMatt Arsenault     if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
608fecf43ebSMatt Arsenault       const bool ArgUsed = !Arg.use_empty();
609fecf43ebSMatt Arsenault       bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
610fecf43ebSMatt Arsenault 
611fecf43ebSMatt Arsenault       if (!SkipArg) {
612c7709e1cSTom Stellard         Info->markPSInputAllocated(PSInputNum);
613fecf43ebSMatt Arsenault         if (ArgUsed)
614c7709e1cSTom Stellard           Info->markPSInputEnabled(PSInputNum);
615fecf43ebSMatt Arsenault       }
616c7709e1cSTom Stellard 
617c7709e1cSTom Stellard       ++PSInputNum;
618c7709e1cSTom Stellard 
619fecf43ebSMatt Arsenault       if (SkipArg) {
620b60a2ae4SMatt Arsenault         for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
621b60a2ae4SMatt Arsenault           MIRBuilder.buildUndef(VRegs[Idx][I]);
622b60a2ae4SMatt Arsenault 
623fecf43ebSMatt Arsenault         ++Idx;
624c7709e1cSTom Stellard         continue;
625fecf43ebSMatt Arsenault       }
6269d8337d8STom Stellard     }
627e0a4da8cSMatt Arsenault 
628fecf43ebSMatt Arsenault     ArgInfo OrigArg(VRegs[Idx], Arg.getType());
629fecf43ebSMatt Arsenault     setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
630b60a2ae4SMatt Arsenault 
631b60a2ae4SMatt Arsenault     splitToValueTypes(
632b60a2ae4SMatt Arsenault       OrigArg, SplitArgs, DL, MRI, CC,
633fecf43ebSMatt Arsenault       // FIXME: We should probably be passing multiple registers to
634fecf43ebSMatt Arsenault       // handleAssignments to do this
635b60a2ae4SMatt Arsenault       [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
636b60a2ae4SMatt Arsenault         packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs,
637b60a2ae4SMatt Arsenault                                 LLTy, PartLLT);
638fecf43ebSMatt Arsenault       });
639fecf43ebSMatt Arsenault 
640fecf43ebSMatt Arsenault     ++Idx;
6419d8337d8STom Stellard   }
6429d8337d8STom Stellard 
643fecf43ebSMatt Arsenault   // At least one interpolation mode must be enabled or else the GPU will
644fecf43ebSMatt Arsenault   // hang.
645fecf43ebSMatt Arsenault   //
646fecf43ebSMatt Arsenault   // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
647fecf43ebSMatt Arsenault   // set PSInputAddr, the user wants to enable some bits after the compilation
648fecf43ebSMatt Arsenault   // based on run-time states. Since we can't know what the final PSInputEna
649fecf43ebSMatt Arsenault   // will look like, so we shouldn't do anything here and the user should take
650fecf43ebSMatt Arsenault   // responsibility for the correct programming.
651fecf43ebSMatt Arsenault   //
652fecf43ebSMatt Arsenault   // Otherwise, the following restrictions apply:
653fecf43ebSMatt Arsenault   // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
654fecf43ebSMatt Arsenault   // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
655fecf43ebSMatt Arsenault   //   enabled too.
656fecf43ebSMatt Arsenault   if (CC == CallingConv::AMDGPU_PS) {
657fecf43ebSMatt Arsenault     if ((Info->getPSInputAddr() & 0x7F) == 0 ||
658fecf43ebSMatt Arsenault         ((Info->getPSInputAddr() & 0xF) == 0 &&
659fecf43ebSMatt Arsenault          Info->isPSInputAllocated(11))) {
660fecf43ebSMatt Arsenault       CCInfo.AllocateReg(AMDGPU::VGPR0);
661fecf43ebSMatt Arsenault       CCInfo.AllocateReg(AMDGPU::VGPR1);
662fecf43ebSMatt Arsenault       Info->markPSInputAllocated(0);
663fecf43ebSMatt Arsenault       Info->markPSInputEnabled(0);
664fecf43ebSMatt Arsenault     }
665fecf43ebSMatt Arsenault 
666fecf43ebSMatt Arsenault     if (Subtarget.isAmdPalOS()) {
667fecf43ebSMatt Arsenault       // For isAmdPalOS, the user does not enable some bits after compilation
668fecf43ebSMatt Arsenault       // based on run-time states; the register values being generated here are
669fecf43ebSMatt Arsenault       // the final ones set in hardware. Therefore we need to apply the
670fecf43ebSMatt Arsenault       // workaround to PSInputAddr and PSInputEnable together.  (The case where
671fecf43ebSMatt Arsenault       // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
672fecf43ebSMatt Arsenault       // set up an input arg for a particular interpolation mode, but nothing
673fecf43ebSMatt Arsenault       // uses that input arg. Really we should have an earlier pass that removes
674fecf43ebSMatt Arsenault       // such an arg.)
675fecf43ebSMatt Arsenault       unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
676fecf43ebSMatt Arsenault       if ((PsInputBits & 0x7F) == 0 ||
677fecf43ebSMatt Arsenault           ((PsInputBits & 0xF) == 0 &&
678fecf43ebSMatt Arsenault            (PsInputBits >> 11 & 1)))
679fecf43ebSMatt Arsenault         Info->markPSInputEnabled(
680fecf43ebSMatt Arsenault           countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
681fecf43ebSMatt Arsenault     }
682fecf43ebSMatt Arsenault   }
683fecf43ebSMatt Arsenault 
684fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
685fecf43ebSMatt Arsenault   CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
686fecf43ebSMatt Arsenault 
687fecf43ebSMatt Arsenault   if (!MBB.empty())
688fecf43ebSMatt Arsenault     MIRBuilder.setInstr(*MBB.begin());
689fecf43ebSMatt Arsenault 
690fecf43ebSMatt Arsenault   FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
691fecf43ebSMatt Arsenault   if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
69229f30379SMatt Arsenault     return false;
693fecf43ebSMatt Arsenault 
694fecf43ebSMatt Arsenault   if (!IsEntryFunc) {
695fecf43ebSMatt Arsenault     // Special inputs come after user arguments.
696fecf43ebSMatt Arsenault     TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
697fecf43ebSMatt Arsenault   }
698fecf43ebSMatt Arsenault 
699fecf43ebSMatt Arsenault   // Start adding system SGPRs.
700fecf43ebSMatt Arsenault   if (IsEntryFunc) {
701fecf43ebSMatt Arsenault     TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
702fecf43ebSMatt Arsenault   } else {
703fecf43ebSMatt Arsenault     CCInfo.AllocateReg(Info->getScratchRSrcReg());
704fecf43ebSMatt Arsenault     CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
705fecf43ebSMatt Arsenault     CCInfo.AllocateReg(Info->getFrameOffsetReg());
706fecf43ebSMatt Arsenault     TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
707fecf43ebSMatt Arsenault   }
708fecf43ebSMatt Arsenault 
709fecf43ebSMatt Arsenault   // Move back to the end of the basic block.
710fecf43ebSMatt Arsenault   MIRBuilder.setMBB(MBB);
711fecf43ebSMatt Arsenault 
712fecf43ebSMatt Arsenault   return true;
713000c5af3STom Stellard }
714