1d8ea85acSTom Stellard //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2000c5af3STom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6000c5af3STom Stellard //
7000c5af3STom Stellard //===----------------------------------------------------------------------===//
8000c5af3STom Stellard ///
9000c5af3STom Stellard /// \file
10000c5af3STom Stellard /// This file implements the lowering of LLVM calls to machine code calls for
11000c5af3STom Stellard /// GlobalISel.
12000c5af3STom Stellard ///
13000c5af3STom Stellard //===----------------------------------------------------------------------===//
14000c5af3STom Stellard 
15000c5af3STom Stellard #include "AMDGPUCallLowering.h"
16ca16621bSTom Stellard #include "AMDGPU.h"
17000c5af3STom Stellard #include "AMDGPUISelLowering.h"
18ca16621bSTom Stellard #include "AMDGPUSubtarget.h"
19ca16621bSTom Stellard #include "SIISelLowering.h"
20ca16621bSTom Stellard #include "SIMachineFunctionInfo.h"
216bda14b3SChandler Carruth #include "SIRegisterInfo.h"
2244b30b45STom Stellard #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23206b9927STom Stellard #include "llvm/CodeGen/Analysis.h"
24ca16621bSTom Stellard #include "llvm/CodeGen/CallingConvLower.h"
25000c5af3STom Stellard #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26000c5af3STom Stellard #include "llvm/CodeGen/MachineInstrBuilder.h"
27206b9927STom Stellard #include "llvm/Support/LowLevelTypeImpl.h"
28000c5af3STom Stellard 
29000c5af3STom Stellard using namespace llvm;
30000c5af3STom Stellard 
31206b9927STom Stellard namespace {
32206b9927STom Stellard 
33a9ea8a9aSMatt Arsenault struct OutgoingValueHandler : public CallLowering::ValueHandler {
3406c8cb03SAustin Kerbow   OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
35206b9927STom Stellard                        MachineInstrBuilder MIB, CCAssignFn *AssignFn)
3606c8cb03SAustin Kerbow       : ValueHandler(B, MRI, AssignFn), MIB(MIB) {}
37206b9927STom Stellard 
38206b9927STom Stellard   MachineInstrBuilder MIB;
39206b9927STom Stellard 
409f9151d4SQuentin Colombet   bool isIncomingArgumentHandler() const override { return false; }
419f9151d4SQuentin Colombet 
42faeaedf8SMatt Arsenault   Register getStackAddress(uint64_t Size, int64_t Offset,
43206b9927STom Stellard                            MachinePointerInfo &MPO) override {
44206b9927STom Stellard     llvm_unreachable("not implemented");
45206b9927STom Stellard   }
46206b9927STom Stellard 
47faeaedf8SMatt Arsenault   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
48206b9927STom Stellard                             MachinePointerInfo &MPO, CCValAssign &VA) override {
49206b9927STom Stellard     llvm_unreachable("not implemented");
50206b9927STom Stellard   }
51206b9927STom Stellard 
52faeaedf8SMatt Arsenault   void assignValueToReg(Register ValVReg, Register PhysReg,
53206b9927STom Stellard                         CCValAssign &VA) override {
54a9ea8a9aSMatt Arsenault     Register ExtReg;
55a9ea8a9aSMatt Arsenault     if (VA.getLocVT().getSizeInBits() < 32) {
56a9ea8a9aSMatt Arsenault       // 16-bit types are reported as legal for 32-bit registers. We need to
57a9ea8a9aSMatt Arsenault       // extend and do a 32-bit copy to avoid the verifier complaining about it.
58a9ea8a9aSMatt Arsenault       ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
59a9ea8a9aSMatt Arsenault     } else
60a9ea8a9aSMatt Arsenault       ExtReg = extendRegister(ValVReg, VA);
61a9ea8a9aSMatt Arsenault 
6267cfbec7SMatt Arsenault     // If this is a scalar return, insert a readfirstlane just in case the value
6367cfbec7SMatt Arsenault     // ends up in a VGPR.
6467cfbec7SMatt Arsenault     // FIXME: Assert this is a shader return.
6567cfbec7SMatt Arsenault     const SIRegisterInfo *TRI
6667cfbec7SMatt Arsenault       = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
6767cfbec7SMatt Arsenault     if (TRI->isSGPRReg(MRI, PhysReg)) {
6867cfbec7SMatt Arsenault       auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
6967cfbec7SMatt Arsenault                                               {MRI.getType(ExtReg)}, false)
7067cfbec7SMatt Arsenault         .addReg(ExtReg);
7167cfbec7SMatt Arsenault       ExtReg = ToSGPR.getReg(0);
7267cfbec7SMatt Arsenault     }
7367cfbec7SMatt Arsenault 
74a9ea8a9aSMatt Arsenault     MIRBuilder.buildCopy(PhysReg, ExtReg);
75a9ea8a9aSMatt Arsenault     MIB.addUse(PhysReg, RegState::Implicit);
76206b9927STom Stellard   }
77206b9927STom Stellard 
78206b9927STom Stellard   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
79206b9927STom Stellard                  CCValAssign::LocInfo LocInfo,
80206b9927STom Stellard                  const CallLowering::ArgInfo &Info,
81fbaf425bSAmara Emerson                  ISD::ArgFlagsTy Flags,
82206b9927STom Stellard                  CCState &State) override {
83fbaf425bSAmara Emerson     return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
84206b9927STom Stellard   }
85206b9927STom Stellard };
86206b9927STom Stellard 
87fecf43ebSMatt Arsenault struct IncomingArgHandler : public CallLowering::ValueHandler {
88fecf43ebSMatt Arsenault   uint64_t StackUsed = 0;
89fecf43ebSMatt Arsenault 
9006c8cb03SAustin Kerbow   IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
91fecf43ebSMatt Arsenault                      CCAssignFn *AssignFn)
9206c8cb03SAustin Kerbow     : ValueHandler(B, MRI, AssignFn) {}
93fecf43ebSMatt Arsenault 
94fecf43ebSMatt Arsenault   Register getStackAddress(uint64_t Size, int64_t Offset,
95fecf43ebSMatt Arsenault                            MachinePointerInfo &MPO) override {
96fecf43ebSMatt Arsenault     auto &MFI = MIRBuilder.getMF().getFrameInfo();
97fecf43ebSMatt Arsenault     int FI = MFI.CreateFixedObject(Size, Offset, true);
98fecf43ebSMatt Arsenault     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
992a1b5af2SJay Foad     auto AddrReg = MIRBuilder.buildFrameIndex(
1002a1b5af2SJay Foad         LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), FI);
101fecf43ebSMatt Arsenault     StackUsed = std::max(StackUsed, Size + Offset);
1022a1b5af2SJay Foad     return AddrReg.getReg(0);
103fecf43ebSMatt Arsenault   }
104fecf43ebSMatt Arsenault 
105fecf43ebSMatt Arsenault   void assignValueToReg(Register ValVReg, Register PhysReg,
106fecf43ebSMatt Arsenault                         CCValAssign &VA) override {
107fecf43ebSMatt Arsenault     markPhysRegUsed(PhysReg);
108fecf43ebSMatt Arsenault 
109fecf43ebSMatt Arsenault     if (VA.getLocVT().getSizeInBits() < 32) {
110fecf43ebSMatt Arsenault       // 16-bit types are reported as legal for 32-bit registers. We need to do
111fecf43ebSMatt Arsenault       // a 32-bit copy, and truncate to avoid the verifier complaining about it.
112fecf43ebSMatt Arsenault       auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
113fecf43ebSMatt Arsenault       MIRBuilder.buildTrunc(ValVReg, Copy);
114fecf43ebSMatt Arsenault       return;
115fecf43ebSMatt Arsenault     }
116fecf43ebSMatt Arsenault 
117fecf43ebSMatt Arsenault     switch (VA.getLocInfo()) {
118fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::SExt:
119fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::ZExt:
120fecf43ebSMatt Arsenault     case CCValAssign::LocInfo::AExt: {
121fecf43ebSMatt Arsenault       auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
122fecf43ebSMatt Arsenault       MIRBuilder.buildTrunc(ValVReg, Copy);
123fecf43ebSMatt Arsenault       break;
124fecf43ebSMatt Arsenault     }
125fecf43ebSMatt Arsenault     default:
126fecf43ebSMatt Arsenault       MIRBuilder.buildCopy(ValVReg, PhysReg);
127fecf43ebSMatt Arsenault       break;
128fecf43ebSMatt Arsenault     }
129fecf43ebSMatt Arsenault   }
130fecf43ebSMatt Arsenault 
131fecf43ebSMatt Arsenault   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
132fecf43ebSMatt Arsenault                             MachinePointerInfo &MPO, CCValAssign &VA) override {
133fb0c35faSMatt Arsenault     MachineFunction &MF = MIRBuilder.getMF();
134fb0c35faSMatt Arsenault     unsigned Align = inferAlignmentFromPtrInfo(MF, MPO);
135fb0c35faSMatt Arsenault 
136fecf43ebSMatt Arsenault     // FIXME: Get alignment
137fb0c35faSMatt Arsenault     auto MMO = MF.getMachineMemOperand(
138fb0c35faSMatt Arsenault         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
139fb0c35faSMatt Arsenault         Align);
140fecf43ebSMatt Arsenault     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
141fecf43ebSMatt Arsenault   }
142fecf43ebSMatt Arsenault 
143fecf43ebSMatt Arsenault   /// How the physical register gets marked varies between formal
144fecf43ebSMatt Arsenault   /// parameters (it's a basic-block live-in), and a call instruction
145fecf43ebSMatt Arsenault   /// (it's an implicit-def of the BL).
146fecf43ebSMatt Arsenault   virtual void markPhysRegUsed(unsigned PhysReg) = 0;
147fecf43ebSMatt Arsenault 
148fecf43ebSMatt Arsenault   // FIXME: What is the point of this being a callback?
149bc1172dfSAmara Emerson   bool isIncomingArgumentHandler() const override { return true; }
150fecf43ebSMatt Arsenault };
151fecf43ebSMatt Arsenault 
152fecf43ebSMatt Arsenault struct FormalArgHandler : public IncomingArgHandler {
15306c8cb03SAustin Kerbow   FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
154fecf43ebSMatt Arsenault                    CCAssignFn *AssignFn)
15506c8cb03SAustin Kerbow     : IncomingArgHandler(B, MRI, AssignFn) {}
156fecf43ebSMatt Arsenault 
157fecf43ebSMatt Arsenault   void markPhysRegUsed(unsigned PhysReg) override {
158fecf43ebSMatt Arsenault     MIRBuilder.getMBB().addLiveIn(PhysReg);
159fecf43ebSMatt Arsenault   }
160fecf43ebSMatt Arsenault };
161fecf43ebSMatt Arsenault 
162206b9927STom Stellard }
163206b9927STom Stellard 
164000c5af3STom Stellard AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
1650da6350dSMatt Arsenault   : CallLowering(&TLI) {
166000c5af3STom Stellard }
167000c5af3STom Stellard 
168eb416277SMatt Arsenault // FIXME: Compatability shim
169eb416277SMatt Arsenault static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
170eb416277SMatt Arsenault   switch (MIOpc) {
171eb416277SMatt Arsenault   case TargetOpcode::G_SEXT:
172eb416277SMatt Arsenault     return ISD::SIGN_EXTEND;
173eb416277SMatt Arsenault   case TargetOpcode::G_ZEXT:
174eb416277SMatt Arsenault     return ISD::ZERO_EXTEND;
175eb416277SMatt Arsenault   case TargetOpcode::G_ANYEXT:
176eb416277SMatt Arsenault     return ISD::ANY_EXTEND;
177eb416277SMatt Arsenault   default:
178eb416277SMatt Arsenault     llvm_unreachable("not an extend opcode");
179eb416277SMatt Arsenault   }
180eb416277SMatt Arsenault }
181eb416277SMatt Arsenault 
182fecf43ebSMatt Arsenault void AMDGPUCallLowering::splitToValueTypes(
183eb416277SMatt Arsenault   MachineIRBuilder &B,
184eb416277SMatt Arsenault   const ArgInfo &OrigArg, unsigned OrigArgIdx,
185eb416277SMatt Arsenault   SmallVectorImpl<ArgInfo> &SplitArgs,
186eb416277SMatt Arsenault   const DataLayout &DL, CallingConv::ID CallConv,
187fecf43ebSMatt Arsenault   SplitArgTy PerformArgSplit) const {
188fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
189fecf43ebSMatt Arsenault   LLVMContext &Ctx = OrigArg.Ty->getContext();
190fecf43ebSMatt Arsenault 
191fecf43ebSMatt Arsenault   if (OrigArg.Ty->isVoidTy())
192fecf43ebSMatt Arsenault     return;
193fecf43ebSMatt Arsenault 
194fecf43ebSMatt Arsenault   SmallVector<EVT, 4> SplitVTs;
195fecf43ebSMatt Arsenault   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
196fecf43ebSMatt Arsenault 
197b60a2ae4SMatt Arsenault   assert(OrigArg.Regs.size() == SplitVTs.size());
198b60a2ae4SMatt Arsenault 
199b60a2ae4SMatt Arsenault   int SplitIdx = 0;
200b60a2ae4SMatt Arsenault   for (EVT VT : SplitVTs) {
201eb416277SMatt Arsenault     Register Reg = OrigArg.Regs[SplitIdx];
202b60a2ae4SMatt Arsenault     Type *Ty = VT.getTypeForEVT(Ctx);
203eb416277SMatt Arsenault     LLT LLTy = getLLTForType(*Ty, DL);
204b60a2ae4SMatt Arsenault 
205eb416277SMatt Arsenault     if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) {
206eb416277SMatt Arsenault       unsigned ExtendOp = TargetOpcode::G_ANYEXT;
207eb416277SMatt Arsenault       if (OrigArg.Flags[0].isSExt()) {
208eb416277SMatt Arsenault         assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
209eb416277SMatt Arsenault         ExtendOp = TargetOpcode::G_SEXT;
210eb416277SMatt Arsenault       } else if (OrigArg.Flags[0].isZExt()) {
211eb416277SMatt Arsenault         assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
212eb416277SMatt Arsenault         ExtendOp = TargetOpcode::G_ZEXT;
213eb416277SMatt Arsenault       }
214b60a2ae4SMatt Arsenault 
215eb416277SMatt Arsenault       EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
216eb416277SMatt Arsenault                                           extOpcodeToISDExtOpcode(ExtendOp));
217eb416277SMatt Arsenault       if (ExtVT != VT) {
218eb416277SMatt Arsenault         VT = ExtVT;
219eb416277SMatt Arsenault         Ty = ExtVT.getTypeForEVT(Ctx);
220eb416277SMatt Arsenault         LLTy = getLLTForType(*Ty, DL);
221eb416277SMatt Arsenault         Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0);
222eb416277SMatt Arsenault       }
223eb416277SMatt Arsenault     }
224eb416277SMatt Arsenault 
225eb416277SMatt Arsenault     unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
226eb416277SMatt Arsenault     MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
227fecf43ebSMatt Arsenault 
228fecf43ebSMatt Arsenault     if (NumParts == 1) {
229*bb009498SMatt Arsenault       // Fixup EVTs to an MVT.
230*bb009498SMatt Arsenault       //
231*bb009498SMatt Arsenault       // FIXME: This is pretty hacky. Why do we have to split the type
232*bb009498SMatt Arsenault       // legalization logic between here and handleAssignments?
233*bb009498SMatt Arsenault       if (OrigArgIdx != AttributeList::ReturnIndex && VT != RegVT) {
234*bb009498SMatt Arsenault         assert(VT.getSizeInBits() < 32 &&
235*bb009498SMatt Arsenault                "unexpected illegal type");
236*bb009498SMatt Arsenault         Ty = Type::getInt32Ty(Ctx);
237*bb009498SMatt Arsenault         Register OrigReg = Reg;
238*bb009498SMatt Arsenault         Reg = B.getMRI()->createGenericVirtualRegister(LLT::scalar(32));
239*bb009498SMatt Arsenault         B.buildTrunc(OrigReg, Reg);
240*bb009498SMatt Arsenault       }
241*bb009498SMatt Arsenault 
242fecf43ebSMatt Arsenault       // No splitting to do, but we want to replace the original type (e.g. [1 x
243fecf43ebSMatt Arsenault       // double] -> double).
244eb416277SMatt Arsenault       SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed);
245b60a2ae4SMatt Arsenault 
246b60a2ae4SMatt Arsenault       ++SplitIdx;
247b60a2ae4SMatt Arsenault       continue;
248fecf43ebSMatt Arsenault     }
249fecf43ebSMatt Arsenault 
250fecf43ebSMatt Arsenault     SmallVector<Register, 8> SplitRegs;
251eb416277SMatt Arsenault     Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx);
252fecf43ebSMatt Arsenault     LLT PartLLT = getLLTForType(*PartTy, DL);
253eb416277SMatt Arsenault     MachineRegisterInfo &MRI = *B.getMRI();
254fecf43ebSMatt Arsenault 
255fecf43ebSMatt Arsenault     // FIXME: Should we be reporting all of the part registers for a single
256fecf43ebSMatt Arsenault     // argument, and let handleAssignments take care of the repacking?
257fecf43ebSMatt Arsenault     for (unsigned i = 0; i < NumParts; ++i) {
258fecf43ebSMatt Arsenault       Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
259fecf43ebSMatt Arsenault       SplitRegs.push_back(PartReg);
260fecf43ebSMatt Arsenault       SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
261fecf43ebSMatt Arsenault     }
262fecf43ebSMatt Arsenault 
263eb416277SMatt Arsenault     PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx);
264b60a2ae4SMatt Arsenault 
265b60a2ae4SMatt Arsenault     ++SplitIdx;
266b60a2ae4SMatt Arsenault   }
267fecf43ebSMatt Arsenault }
268fecf43ebSMatt Arsenault 
269a9ea8a9aSMatt Arsenault // Get the appropriate type to make \p OrigTy \p Factor times bigger.
270a9ea8a9aSMatt Arsenault static LLT getMultipleType(LLT OrigTy, int Factor) {
271a9ea8a9aSMatt Arsenault   if (OrigTy.isVector()) {
272a9ea8a9aSMatt Arsenault     return LLT::vector(OrigTy.getNumElements() * Factor,
273a9ea8a9aSMatt Arsenault                        OrigTy.getElementType());
274a9ea8a9aSMatt Arsenault   }
275a9ea8a9aSMatt Arsenault 
276a9ea8a9aSMatt Arsenault   return LLT::scalar(OrigTy.getSizeInBits() * Factor);
277a9ea8a9aSMatt Arsenault }
278a9ea8a9aSMatt Arsenault 
279a9ea8a9aSMatt Arsenault // TODO: Move to generic code
28006c8cb03SAustin Kerbow static void unpackRegsToOrigType(MachineIRBuilder &B,
281a9ea8a9aSMatt Arsenault                                  ArrayRef<Register> DstRegs,
282a9ea8a9aSMatt Arsenault                                  Register SrcReg,
283eb416277SMatt Arsenault                                  const CallLowering::ArgInfo &Info,
284a9ea8a9aSMatt Arsenault                                  LLT SrcTy,
285a9ea8a9aSMatt Arsenault                                  LLT PartTy) {
286a9ea8a9aSMatt Arsenault   assert(DstRegs.size() > 1 && "Nothing to unpack");
287a9ea8a9aSMatt Arsenault 
288a9ea8a9aSMatt Arsenault   const unsigned SrcSize = SrcTy.getSizeInBits();
289a9ea8a9aSMatt Arsenault   const unsigned PartSize = PartTy.getSizeInBits();
290a9ea8a9aSMatt Arsenault 
291a9ea8a9aSMatt Arsenault   if (SrcTy.isVector() && !PartTy.isVector() &&
292a9ea8a9aSMatt Arsenault       PartSize > SrcTy.getElementType().getSizeInBits()) {
293a9ea8a9aSMatt Arsenault     // Vector was scalarized, and the elements extended.
29406c8cb03SAustin Kerbow     auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(),
295a9ea8a9aSMatt Arsenault                                                   SrcReg);
296a9ea8a9aSMatt Arsenault     for (int i = 0, e = DstRegs.size(); i != e; ++i)
29706c8cb03SAustin Kerbow       B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
298a9ea8a9aSMatt Arsenault     return;
299a9ea8a9aSMatt Arsenault   }
300a9ea8a9aSMatt Arsenault 
301a9ea8a9aSMatt Arsenault   if (SrcSize % PartSize == 0) {
30206c8cb03SAustin Kerbow     B.buildUnmerge(DstRegs, SrcReg);
303a9ea8a9aSMatt Arsenault     return;
304a9ea8a9aSMatt Arsenault   }
305a9ea8a9aSMatt Arsenault 
306a9ea8a9aSMatt Arsenault   const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize;
307a9ea8a9aSMatt Arsenault 
308a9ea8a9aSMatt Arsenault   LLT BigTy = getMultipleType(PartTy, NumRoundedParts);
30906c8cb03SAustin Kerbow   auto ImpDef = B.buildUndef(BigTy);
310a9ea8a9aSMatt Arsenault 
3112a1b5af2SJay Foad   auto Big = B.buildInsert(BigTy, ImpDef.getReg(0), SrcReg, 0).getReg(0);
312a9ea8a9aSMatt Arsenault 
313a9ea8a9aSMatt Arsenault   int64_t Offset = 0;
314a9ea8a9aSMatt Arsenault   for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize)
3152a1b5af2SJay Foad     B.buildExtract(DstRegs[i], Big, Offset);
316a9ea8a9aSMatt Arsenault }
317a9ea8a9aSMatt Arsenault 
318a9ea8a9aSMatt Arsenault /// Lower the return value for the already existing \p Ret. This assumes that
31906c8cb03SAustin Kerbow /// \p B's insertion point is correct.
32006c8cb03SAustin Kerbow bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
321a9ea8a9aSMatt Arsenault                                         const Value *Val, ArrayRef<Register> VRegs,
322a9ea8a9aSMatt Arsenault                                         MachineInstrBuilder &Ret) const {
323a9ea8a9aSMatt Arsenault   if (!Val)
324a9ea8a9aSMatt Arsenault     return true;
325a9ea8a9aSMatt Arsenault 
32606c8cb03SAustin Kerbow   auto &MF = B.getMF();
327a9ea8a9aSMatt Arsenault   const auto &F = MF.getFunction();
328a9ea8a9aSMatt Arsenault   const DataLayout &DL = MF.getDataLayout();
329eb416277SMatt Arsenault   MachineRegisterInfo *MRI = B.getMRI();
330a9ea8a9aSMatt Arsenault 
331a9ea8a9aSMatt Arsenault   CallingConv::ID CC = F.getCallingConv();
332a9ea8a9aSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
333a9ea8a9aSMatt Arsenault 
334a9ea8a9aSMatt Arsenault   ArgInfo OrigRetInfo(VRegs, Val->getType());
335a9ea8a9aSMatt Arsenault   setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
336a9ea8a9aSMatt Arsenault   SmallVector<ArgInfo, 4> SplitRetInfos;
337a9ea8a9aSMatt Arsenault 
338a9ea8a9aSMatt Arsenault   splitToValueTypes(
339eb416277SMatt Arsenault     B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC,
340eb416277SMatt Arsenault     [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
341eb416277SMatt Arsenault         int VTSplitIdx) {
342eb416277SMatt Arsenault       unpackRegsToOrigType(B, Regs, SrcReg,
343eb416277SMatt Arsenault                            SplitRetInfos[VTSplitIdx],
344eb416277SMatt Arsenault                            LLTy, PartLLT);
345a9ea8a9aSMatt Arsenault     });
346a9ea8a9aSMatt Arsenault 
347a9ea8a9aSMatt Arsenault   CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
348eb416277SMatt Arsenault   OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
34906c8cb03SAustin Kerbow   return handleAssignments(B, SplitRetInfos, RetHandler);
350a9ea8a9aSMatt Arsenault }
351a9ea8a9aSMatt Arsenault 
35206c8cb03SAustin Kerbow bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
35349168f67SAlexander Ivchenko                                      const Value *Val,
354e3a676e9SMatt Arsenault                                      ArrayRef<Register> VRegs) const {
355206b9927STom Stellard 
35606c8cb03SAustin Kerbow   MachineFunction &MF = B.getMF();
357206b9927STom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
358206b9927STom Stellard   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
359206b9927STom Stellard   MFI->setIfReturnsVoid(!Val);
360206b9927STom Stellard 
361a9ea8a9aSMatt Arsenault   assert(!Val == VRegs.empty() && "Return value without a vreg");
362a9ea8a9aSMatt Arsenault 
36306c8cb03SAustin Kerbow   CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
364a9ea8a9aSMatt Arsenault   const bool IsShader = AMDGPU::isShader(CC);
365a9ea8a9aSMatt Arsenault   const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) ||
366a9ea8a9aSMatt Arsenault                          AMDGPU::isKernel(CC);
367a9ea8a9aSMatt Arsenault   if (IsWaveEnd) {
36806c8cb03SAustin Kerbow     B.buildInstr(AMDGPU::S_ENDPGM)
369a9ea8a9aSMatt Arsenault       .addImm(0);
370206b9927STom Stellard     return true;
371206b9927STom Stellard   }
372206b9927STom Stellard 
373eb416277SMatt Arsenault   auto const &ST = MF.getSubtarget<GCNSubtarget>();
374206b9927STom Stellard 
375711556e6SMichael Liao   unsigned ReturnOpc =
376711556e6SMichael Liao       IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
377257882ffSTom Stellard 
37806c8cb03SAustin Kerbow   auto Ret = B.buildInstrNoInsert(ReturnOpc);
379a9ea8a9aSMatt Arsenault   Register ReturnAddrVReg;
380a9ea8a9aSMatt Arsenault   if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
381a9ea8a9aSMatt Arsenault     ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
382a9ea8a9aSMatt Arsenault     Ret.addUse(ReturnAddrVReg);
383206b9927STom Stellard   }
384206b9927STom Stellard 
38506c8cb03SAustin Kerbow   if (!lowerReturnVal(B, Val, VRegs, Ret))
386a9ea8a9aSMatt Arsenault     return false;
387a9ea8a9aSMatt Arsenault 
388a9ea8a9aSMatt Arsenault   if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
389a9ea8a9aSMatt Arsenault     const SIRegisterInfo *TRI = ST.getRegisterInfo();
390a9ea8a9aSMatt Arsenault     Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
391a9ea8a9aSMatt Arsenault                                          &AMDGPU::SGPR_64RegClass);
39206c8cb03SAustin Kerbow     B.buildCopy(ReturnAddrVReg, LiveInReturn);
393a9ea8a9aSMatt Arsenault   }
394a9ea8a9aSMatt Arsenault 
395a9ea8a9aSMatt Arsenault   // TODO: Handle CalleeSavedRegsViaCopy.
396a9ea8a9aSMatt Arsenault 
39706c8cb03SAustin Kerbow   B.insertInstr(Ret);
398000c5af3STom Stellard   return true;
399000c5af3STom Stellard }
400000c5af3STom Stellard 
40106c8cb03SAustin Kerbow Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B,
402ca16621bSTom Stellard                                                Type *ParamTy,
40329f30379SMatt Arsenault                                                uint64_t Offset) const {
404ca16621bSTom Stellard 
40506c8cb03SAustin Kerbow   MachineFunction &MF = B.getMF();
4068623e8d8SMatt Arsenault   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
407ca16621bSTom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
408f1caa283SMatthias Braun   const Function &F = MF.getFunction();
409ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
4100da6350dSMatt Arsenault   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
41152b4ce72SDaniel Sanders   LLT PtrType = getLLTForType(*PtrTy, DL);
412faeaedf8SMatt Arsenault   Register KernArgSegmentPtr =
4138623e8d8SMatt Arsenault     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
414faeaedf8SMatt Arsenault   Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
415ca16621bSTom Stellard 
4162a1b5af2SJay Foad   auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
417ca16621bSTom Stellard 
4182a1b5af2SJay Foad   return B.buildPtrAdd(PtrType, KernArgSegmentVReg, OffsetReg).getReg(0);
419ca16621bSTom Stellard }
420ca16621bSTom Stellard 
42106c8cb03SAustin Kerbow void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B,
42229f30379SMatt Arsenault                                         Type *ParamTy, uint64_t Offset,
42329f30379SMatt Arsenault                                         unsigned Align,
424e3a676e9SMatt Arsenault                                         Register DstReg) const {
42506c8cb03SAustin Kerbow   MachineFunction &MF = B.getMF();
426f1caa283SMatthias Braun   const Function &F = MF.getFunction();
427ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
428c7c05b0cSJay Foad   MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
429ca16621bSTom Stellard   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
43006c8cb03SAustin Kerbow   Register PtrReg = lowerParameterPtr(B, ParamTy, Offset);
431ca16621bSTom Stellard 
432ca16621bSTom Stellard   MachineMemOperand *MMO =
433ca16621bSTom Stellard       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
4347df225dfSMatt Arsenault                                        MachineMemOperand::MODereferenceable |
435ca16621bSTom Stellard                                        MachineMemOperand::MOInvariant,
436ca16621bSTom Stellard                                        TypeSize, Align);
437ca16621bSTom Stellard 
43806c8cb03SAustin Kerbow   B.buildLoad(DstReg, PtrReg, *MMO);
439ca16621bSTom Stellard }
440ca16621bSTom Stellard 
441bae3636fSMatt Arsenault // Allocate special inputs passed in user SGPRs.
442bae3636fSMatt Arsenault static void allocateHSAUserSGPRs(CCState &CCInfo,
44306c8cb03SAustin Kerbow                                  MachineIRBuilder &B,
444bae3636fSMatt Arsenault                                  MachineFunction &MF,
445bae3636fSMatt Arsenault                                  const SIRegisterInfo &TRI,
446bae3636fSMatt Arsenault                                  SIMachineFunctionInfo &Info) {
447bae3636fSMatt Arsenault   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
448bae3636fSMatt Arsenault   if (Info.hasPrivateSegmentBuffer()) {
449bae3636fSMatt Arsenault     unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
450bae3636fSMatt Arsenault     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
451bae3636fSMatt Arsenault     CCInfo.AllocateReg(PrivateSegmentBufferReg);
452bae3636fSMatt Arsenault   }
453bae3636fSMatt Arsenault 
454bae3636fSMatt Arsenault   if (Info.hasDispatchPtr()) {
455bae3636fSMatt Arsenault     unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
456bae3636fSMatt Arsenault     MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
457bae3636fSMatt Arsenault     CCInfo.AllocateReg(DispatchPtrReg);
458bae3636fSMatt Arsenault   }
459bae3636fSMatt Arsenault 
460bae3636fSMatt Arsenault   if (Info.hasQueuePtr()) {
461bae3636fSMatt Arsenault     unsigned QueuePtrReg = Info.addQueuePtr(TRI);
462bae3636fSMatt Arsenault     MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
463bae3636fSMatt Arsenault     CCInfo.AllocateReg(QueuePtrReg);
464bae3636fSMatt Arsenault   }
465bae3636fSMatt Arsenault 
466bae3636fSMatt Arsenault   if (Info.hasKernargSegmentPtr()) {
467bae3636fSMatt Arsenault     MachineRegisterInfo &MRI = MF.getRegInfo();
468bae3636fSMatt Arsenault     Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
469bae3636fSMatt Arsenault     const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
470bae3636fSMatt Arsenault     Register VReg = MRI.createGenericVirtualRegister(P4);
471bae3636fSMatt Arsenault     MRI.addLiveIn(InputPtrReg, VReg);
47206c8cb03SAustin Kerbow     B.getMBB().addLiveIn(InputPtrReg);
47306c8cb03SAustin Kerbow     B.buildCopy(VReg, InputPtrReg);
474bae3636fSMatt Arsenault     CCInfo.AllocateReg(InputPtrReg);
475bae3636fSMatt Arsenault   }
476bae3636fSMatt Arsenault 
477bae3636fSMatt Arsenault   if (Info.hasDispatchID()) {
478bae3636fSMatt Arsenault     unsigned DispatchIDReg = Info.addDispatchID(TRI);
479bae3636fSMatt Arsenault     MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
480bae3636fSMatt Arsenault     CCInfo.AllocateReg(DispatchIDReg);
481bae3636fSMatt Arsenault   }
482bae3636fSMatt Arsenault 
483bae3636fSMatt Arsenault   if (Info.hasFlatScratchInit()) {
484bae3636fSMatt Arsenault     unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
485bae3636fSMatt Arsenault     MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
486bae3636fSMatt Arsenault     CCInfo.AllocateReg(FlatScratchInitReg);
487bae3636fSMatt Arsenault   }
488bae3636fSMatt Arsenault 
489bae3636fSMatt Arsenault   // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
490bae3636fSMatt Arsenault   // these from the dispatch pointer.
491bae3636fSMatt Arsenault }
492bae3636fSMatt Arsenault 
493b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
49406c8cb03SAustin Kerbow     MachineIRBuilder &B, const Function &F,
495c3dbe239SDiana Picus     ArrayRef<ArrayRef<Register>> VRegs) const {
49606c8cb03SAustin Kerbow   MachineFunction &MF = B.getMF();
4975bfbae5cSTom Stellard   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
498ca16621bSTom Stellard   MachineRegisterInfo &MRI = MF.getRegInfo();
499ca16621bSTom Stellard   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
500fecf43ebSMatt Arsenault   const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
501fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
502fecf43ebSMatt Arsenault 
503ca16621bSTom Stellard   const DataLayout &DL = F.getParent()->getDataLayout();
504ca16621bSTom Stellard 
505ca16621bSTom Stellard   SmallVector<CCValAssign, 16> ArgLocs;
506ca16621bSTom Stellard   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
507ca16621bSTom Stellard 
50806c8cb03SAustin Kerbow   allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
509bae3636fSMatt Arsenault 
51029f30379SMatt Arsenault   unsigned i = 0;
51129f30379SMatt Arsenault   const unsigned KernArgBaseAlign = 16;
51229f30379SMatt Arsenault   const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
51329f30379SMatt Arsenault   uint64_t ExplicitArgOffset = 0;
51429f30379SMatt Arsenault 
51529f30379SMatt Arsenault   // TODO: Align down to dword alignment and extract bits for extending loads.
51629f30379SMatt Arsenault   for (auto &Arg : F.args()) {
51729f30379SMatt Arsenault     Type *ArgTy = Arg.getType();
51829f30379SMatt Arsenault     unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
51929f30379SMatt Arsenault     if (AllocSize == 0)
52029f30379SMatt Arsenault       continue;
52129f30379SMatt Arsenault 
52229f30379SMatt Arsenault     unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
52329f30379SMatt Arsenault 
52429f30379SMatt Arsenault     uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
52529f30379SMatt Arsenault     ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
52629f30379SMatt Arsenault 
527c3dbe239SDiana Picus     ArrayRef<Register> OrigArgRegs = VRegs[i];
528c3dbe239SDiana Picus     Register ArgReg =
529c3dbe239SDiana Picus       OrigArgRegs.size() == 1
530c3dbe239SDiana Picus       ? OrigArgRegs[0]
531c3dbe239SDiana Picus       : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
53229f30379SMatt Arsenault     unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
53329f30379SMatt Arsenault     ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
53406c8cb03SAustin Kerbow     lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg);
535c3dbe239SDiana Picus     if (OrigArgRegs.size() > 1)
53606c8cb03SAustin Kerbow       unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
53729f30379SMatt Arsenault     ++i;
53829f30379SMatt Arsenault   }
53929f30379SMatt Arsenault 
540fecf43ebSMatt Arsenault   TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
541fecf43ebSMatt Arsenault   TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
54229f30379SMatt Arsenault   return true;
54329f30379SMatt Arsenault }
54429f30379SMatt Arsenault 
5459e1d2afcSMatt Arsenault /// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
5469e1d2afcSMatt Arsenault static MachineInstrBuilder mergeVectorRegsToResultRegs(
5479e1d2afcSMatt Arsenault   MachineIRBuilder &B, ArrayRef<Register> DstRegs, ArrayRef<Register> SrcRegs) {
5489e1d2afcSMatt Arsenault   MachineRegisterInfo &MRI = *B.getMRI();
5499e1d2afcSMatt Arsenault   LLT LLTy = MRI.getType(DstRegs[0]);
5509e1d2afcSMatt Arsenault   LLT PartLLT = MRI.getType(SrcRegs[0]);
5519e1d2afcSMatt Arsenault 
5529e1d2afcSMatt Arsenault   // Deal with v3s16 split into v2s16
5539e1d2afcSMatt Arsenault   LLT LCMTy = getLCMType(LLTy, PartLLT);
5549e1d2afcSMatt Arsenault   if (LCMTy == LLTy) {
5559e1d2afcSMatt Arsenault     // Common case where no padding is needed.
5569e1d2afcSMatt Arsenault     assert(DstRegs.size() == 1);
5579e1d2afcSMatt Arsenault     return B.buildConcatVectors(DstRegs[0], SrcRegs);
5589e1d2afcSMatt Arsenault   }
5599e1d2afcSMatt Arsenault 
5609e1d2afcSMatt Arsenault   const int NumWide =  LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
5619e1d2afcSMatt Arsenault   Register Undef = B.buildUndef(PartLLT).getReg(0);
5629e1d2afcSMatt Arsenault 
5639e1d2afcSMatt Arsenault   // Build vector of undefs.
5649e1d2afcSMatt Arsenault   SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
5659e1d2afcSMatt Arsenault 
5669e1d2afcSMatt Arsenault   // Replace the first sources with the real registers.
5679e1d2afcSMatt Arsenault   std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
5689e1d2afcSMatt Arsenault 
5699e1d2afcSMatt Arsenault   auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs);
5709e1d2afcSMatt Arsenault   int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
5719e1d2afcSMatt Arsenault 
5729e1d2afcSMatt Arsenault   SmallVector<Register, 8> PadDstRegs(NumDst);
5739e1d2afcSMatt Arsenault   std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
5749e1d2afcSMatt Arsenault 
5759e1d2afcSMatt Arsenault   // Create the excess dead defs for the unmerge.
5769e1d2afcSMatt Arsenault   for (int I = DstRegs.size(); I != NumDst; ++I)
5779e1d2afcSMatt Arsenault     PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
5789e1d2afcSMatt Arsenault 
5799e1d2afcSMatt Arsenault   return B.buildUnmerge(PadDstRegs, Widened);
5809e1d2afcSMatt Arsenault }
5819e1d2afcSMatt Arsenault 
582a9ea8a9aSMatt Arsenault // TODO: Move this to generic code
58306c8cb03SAustin Kerbow static void packSplitRegsToOrigType(MachineIRBuilder &B,
584fecf43ebSMatt Arsenault                                     ArrayRef<Register> OrigRegs,
585fecf43ebSMatt Arsenault                                     ArrayRef<Register> Regs,
586fecf43ebSMatt Arsenault                                     LLT LLTy,
587fecf43ebSMatt Arsenault                                     LLT PartLLT) {
588c460dc6eSMatt Arsenault   MachineRegisterInfo &MRI = *B.getMRI();
589c460dc6eSMatt Arsenault 
590fecf43ebSMatt Arsenault   if (!LLTy.isVector() && !PartLLT.isVector()) {
591c460dc6eSMatt Arsenault     assert(OrigRegs.size() == 1);
592c460dc6eSMatt Arsenault     LLT OrigTy = MRI.getType(OrigRegs[0]);
593c460dc6eSMatt Arsenault 
594c460dc6eSMatt Arsenault     unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size();
595c460dc6eSMatt Arsenault     if (SrcSize == OrigTy.getSizeInBits())
59606c8cb03SAustin Kerbow       B.buildMerge(OrigRegs[0], Regs);
597c460dc6eSMatt Arsenault     else {
598c460dc6eSMatt Arsenault       auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs);
599c460dc6eSMatt Arsenault       B.buildTrunc(OrigRegs[0], Widened);
600c460dc6eSMatt Arsenault     }
601c460dc6eSMatt Arsenault 
602fecf43ebSMatt Arsenault     return;
603fecf43ebSMatt Arsenault   }
604fecf43ebSMatt Arsenault 
605fecf43ebSMatt Arsenault   if (LLTy.isVector() && PartLLT.isVector()) {
6069e1d2afcSMatt Arsenault     assert(OrigRegs.size() == 1);
607fecf43ebSMatt Arsenault     assert(LLTy.getElementType() == PartLLT.getElementType());
6089e1d2afcSMatt Arsenault     mergeVectorRegsToResultRegs(B, OrigRegs, Regs);
609fecf43ebSMatt Arsenault     return;
610fecf43ebSMatt Arsenault   }
611fecf43ebSMatt Arsenault 
612fecf43ebSMatt Arsenault   assert(LLTy.isVector() && !PartLLT.isVector());
613fecf43ebSMatt Arsenault 
614fecf43ebSMatt Arsenault   LLT DstEltTy = LLTy.getElementType();
615767aa507SMatt Arsenault 
616767aa507SMatt Arsenault   // Pointer information was discarded. We'll need to coerce some register types
617767aa507SMatt Arsenault   // to avoid violating type constraints.
618767aa507SMatt Arsenault   LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType();
619767aa507SMatt Arsenault 
620767aa507SMatt Arsenault   assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits());
621767aa507SMatt Arsenault 
622fecf43ebSMatt Arsenault   if (DstEltTy == PartLLT) {
623fecf43ebSMatt Arsenault     // Vector was trivially scalarized.
624767aa507SMatt Arsenault 
625767aa507SMatt Arsenault     if (RealDstEltTy.isPointer()) {
626767aa507SMatt Arsenault       for (Register Reg : Regs)
627767aa507SMatt Arsenault         MRI.setType(Reg, RealDstEltTy);
628767aa507SMatt Arsenault     }
629767aa507SMatt Arsenault 
63006c8cb03SAustin Kerbow     B.buildBuildVector(OrigRegs[0], Regs);
631fecf43ebSMatt Arsenault   } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
632fecf43ebSMatt Arsenault     // Deal with vector with 64-bit elements decomposed to 32-bit
633fecf43ebSMatt Arsenault     // registers. Need to create intermediate 64-bit elements.
634fecf43ebSMatt Arsenault     SmallVector<Register, 8> EltMerges;
635fecf43ebSMatt Arsenault     int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
636fecf43ebSMatt Arsenault 
637fecf43ebSMatt Arsenault     assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
638fecf43ebSMatt Arsenault 
639fecf43ebSMatt Arsenault     for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I)  {
640767aa507SMatt Arsenault       auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt));
641767aa507SMatt Arsenault       // Fix the type in case this is really a vector of pointers.
642767aa507SMatt Arsenault       MRI.setType(Merge.getReg(0), RealDstEltTy);
643fecf43ebSMatt Arsenault       EltMerges.push_back(Merge.getReg(0));
644fecf43ebSMatt Arsenault       Regs = Regs.drop_front(PartsPerElt);
645fecf43ebSMatt Arsenault     }
646fecf43ebSMatt Arsenault 
64706c8cb03SAustin Kerbow     B.buildBuildVector(OrigRegs[0], EltMerges);
648fecf43ebSMatt Arsenault   } else {
649fecf43ebSMatt Arsenault     // Vector was split, and elements promoted to a wider type.
650fecf43ebSMatt Arsenault     LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
65106c8cb03SAustin Kerbow     auto BV = B.buildBuildVector(BVType, Regs);
65206c8cb03SAustin Kerbow     B.buildTrunc(OrigRegs[0], BV);
653fecf43ebSMatt Arsenault   }
654fecf43ebSMatt Arsenault }
655fecf43ebSMatt Arsenault 
656b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArguments(
65706c8cb03SAustin Kerbow     MachineIRBuilder &B, const Function &F,
658b725d273SMatt Arsenault     ArrayRef<ArrayRef<Register>> VRegs) const {
659fecf43ebSMatt Arsenault   CallingConv::ID CC = F.getCallingConv();
660fecf43ebSMatt Arsenault 
661b725d273SMatt Arsenault   // The infrastructure for normal calling convention lowering is essentially
662b725d273SMatt Arsenault   // useless for kernels. We want to avoid any kind of legalization or argument
663b725d273SMatt Arsenault   // splitting.
664fecf43ebSMatt Arsenault   if (CC == CallingConv::AMDGPU_KERNEL)
66506c8cb03SAustin Kerbow     return lowerFormalArgumentsKernel(B, F, VRegs);
666b725d273SMatt Arsenault 
667fecf43ebSMatt Arsenault   const bool IsShader = AMDGPU::isShader(CC);
668fecf43ebSMatt Arsenault   const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
669fecf43ebSMatt Arsenault 
67006c8cb03SAustin Kerbow   MachineFunction &MF = B.getMF();
67106c8cb03SAustin Kerbow   MachineBasicBlock &MBB = B.getMBB();
672b725d273SMatt Arsenault   MachineRegisterInfo &MRI = MF.getRegInfo();
673b725d273SMatt Arsenault   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
674fecf43ebSMatt Arsenault   const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
675fecf43ebSMatt Arsenault   const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
676b725d273SMatt Arsenault   const DataLayout &DL = F.getParent()->getDataLayout();
677b725d273SMatt Arsenault 
678b725d273SMatt Arsenault 
679b725d273SMatt Arsenault   SmallVector<CCValAssign, 16> ArgLocs;
680fecf43ebSMatt Arsenault   CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
681b725d273SMatt Arsenault 
682a9ea8a9aSMatt Arsenault   if (!IsEntryFunc) {
683a9ea8a9aSMatt Arsenault     Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
684a9ea8a9aSMatt Arsenault     Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
685a9ea8a9aSMatt Arsenault                                          &AMDGPU::SGPR_64RegClass);
686a9ea8a9aSMatt Arsenault     MBB.addLiveIn(ReturnAddrReg);
68706c8cb03SAustin Kerbow     B.buildCopy(LiveInReturn, ReturnAddrReg);
688a9ea8a9aSMatt Arsenault   }
689a9ea8a9aSMatt Arsenault 
690bae3636fSMatt Arsenault   if (Info->hasImplicitBufferPtr()) {
691fecf43ebSMatt Arsenault     Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
692bae3636fSMatt Arsenault     MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
693bae3636fSMatt Arsenault     CCInfo.AllocateReg(ImplicitBufferPtrReg);
694bae3636fSMatt Arsenault   }
695bae3636fSMatt Arsenault 
696fecf43ebSMatt Arsenault 
697fecf43ebSMatt Arsenault   SmallVector<ArgInfo, 32> SplitArgs;
698fecf43ebSMatt Arsenault   unsigned Idx = 0;
699c7709e1cSTom Stellard   unsigned PSInputNum = 0;
7009d8337d8STom Stellard 
701fecf43ebSMatt Arsenault   for (auto &Arg : F.args()) {
702fecf43ebSMatt Arsenault     if (DL.getTypeStoreSize(Arg.getType()) == 0)
703c7709e1cSTom Stellard       continue;
704c7709e1cSTom Stellard 
705fecf43ebSMatt Arsenault     const bool InReg = Arg.hasAttribute(Attribute::InReg);
706fecf43ebSMatt Arsenault 
707fecf43ebSMatt Arsenault     // SGPR arguments to functions not implemented.
708fecf43ebSMatt Arsenault     if (!IsShader && InReg)
709fecf43ebSMatt Arsenault       return false;
710fecf43ebSMatt Arsenault 
711a9ea8a9aSMatt Arsenault     if (Arg.hasAttribute(Attribute::SwiftSelf) ||
712fecf43ebSMatt Arsenault         Arg.hasAttribute(Attribute::SwiftError) ||
713b60a2ae4SMatt Arsenault         Arg.hasAttribute(Attribute::Nest))
714fecf43ebSMatt Arsenault       return false;
715fecf43ebSMatt Arsenault 
716fecf43ebSMatt Arsenault     if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
717fecf43ebSMatt Arsenault       const bool ArgUsed = !Arg.use_empty();
718fecf43ebSMatt Arsenault       bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
719fecf43ebSMatt Arsenault 
720fecf43ebSMatt Arsenault       if (!SkipArg) {
721c7709e1cSTom Stellard         Info->markPSInputAllocated(PSInputNum);
722fecf43ebSMatt Arsenault         if (ArgUsed)
723c7709e1cSTom Stellard           Info->markPSInputEnabled(PSInputNum);
724fecf43ebSMatt Arsenault       }
725c7709e1cSTom Stellard 
726c7709e1cSTom Stellard       ++PSInputNum;
727c7709e1cSTom Stellard 
728fecf43ebSMatt Arsenault       if (SkipArg) {
729b60a2ae4SMatt Arsenault         for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
73006c8cb03SAustin Kerbow           B.buildUndef(VRegs[Idx][I]);
731b60a2ae4SMatt Arsenault 
732fecf43ebSMatt Arsenault         ++Idx;
733c7709e1cSTom Stellard         continue;
734fecf43ebSMatt Arsenault       }
7359d8337d8STom Stellard     }
736e0a4da8cSMatt Arsenault 
737fecf43ebSMatt Arsenault     ArgInfo OrigArg(VRegs[Idx], Arg.getType());
738eb416277SMatt Arsenault     const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
739eb416277SMatt Arsenault     setArgFlags(OrigArg, OrigArgIdx, DL, F);
740b60a2ae4SMatt Arsenault 
741b60a2ae4SMatt Arsenault     splitToValueTypes(
742eb416277SMatt Arsenault       B, OrigArg, OrigArgIdx, SplitArgs, DL, CC,
743fecf43ebSMatt Arsenault       // FIXME: We should probably be passing multiple registers to
744fecf43ebSMatt Arsenault       // handleAssignments to do this
745eb416277SMatt Arsenault       [&](ArrayRef<Register> Regs, Register DstReg,
746eb416277SMatt Arsenault           LLT LLTy, LLT PartLLT, int VTSplitIdx) {
747eb416277SMatt Arsenault         assert(DstReg == VRegs[Idx][VTSplitIdx]);
74806c8cb03SAustin Kerbow         packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
749b60a2ae4SMatt Arsenault                                 LLTy, PartLLT);
750fecf43ebSMatt Arsenault       });
751fecf43ebSMatt Arsenault 
752fecf43ebSMatt Arsenault     ++Idx;
7539d8337d8STom Stellard   }
7549d8337d8STom Stellard 
755fecf43ebSMatt Arsenault   // At least one interpolation mode must be enabled or else the GPU will
756fecf43ebSMatt Arsenault   // hang.
757fecf43ebSMatt Arsenault   //
758fecf43ebSMatt Arsenault   // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
759fecf43ebSMatt Arsenault   // set PSInputAddr, the user wants to enable some bits after the compilation
760fecf43ebSMatt Arsenault   // based on run-time states. Since we can't know what the final PSInputEna
761fecf43ebSMatt Arsenault   // will look like, so we shouldn't do anything here and the user should take
762fecf43ebSMatt Arsenault   // responsibility for the correct programming.
763fecf43ebSMatt Arsenault   //
764fecf43ebSMatt Arsenault   // Otherwise, the following restrictions apply:
765fecf43ebSMatt Arsenault   // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
766fecf43ebSMatt Arsenault   // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
767fecf43ebSMatt Arsenault   //   enabled too.
768fecf43ebSMatt Arsenault   if (CC == CallingConv::AMDGPU_PS) {
769fecf43ebSMatt Arsenault     if ((Info->getPSInputAddr() & 0x7F) == 0 ||
770fecf43ebSMatt Arsenault         ((Info->getPSInputAddr() & 0xF) == 0 &&
771fecf43ebSMatt Arsenault          Info->isPSInputAllocated(11))) {
772fecf43ebSMatt Arsenault       CCInfo.AllocateReg(AMDGPU::VGPR0);
773fecf43ebSMatt Arsenault       CCInfo.AllocateReg(AMDGPU::VGPR1);
774fecf43ebSMatt Arsenault       Info->markPSInputAllocated(0);
775fecf43ebSMatt Arsenault       Info->markPSInputEnabled(0);
776fecf43ebSMatt Arsenault     }
777fecf43ebSMatt Arsenault 
778fecf43ebSMatt Arsenault     if (Subtarget.isAmdPalOS()) {
779fecf43ebSMatt Arsenault       // For isAmdPalOS, the user does not enable some bits after compilation
780fecf43ebSMatt Arsenault       // based on run-time states; the register values being generated here are
781fecf43ebSMatt Arsenault       // the final ones set in hardware. Therefore we need to apply the
782fecf43ebSMatt Arsenault       // workaround to PSInputAddr and PSInputEnable together.  (The case where
783fecf43ebSMatt Arsenault       // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
784fecf43ebSMatt Arsenault       // set up an input arg for a particular interpolation mode, but nothing
785fecf43ebSMatt Arsenault       // uses that input arg. Really we should have an earlier pass that removes
786fecf43ebSMatt Arsenault       // such an arg.)
787fecf43ebSMatt Arsenault       unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
788fecf43ebSMatt Arsenault       if ((PsInputBits & 0x7F) == 0 ||
789fecf43ebSMatt Arsenault           ((PsInputBits & 0xF) == 0 &&
790fecf43ebSMatt Arsenault            (PsInputBits >> 11 & 1)))
791fecf43ebSMatt Arsenault         Info->markPSInputEnabled(
792fecf43ebSMatt Arsenault           countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
793fecf43ebSMatt Arsenault     }
794fecf43ebSMatt Arsenault   }
795fecf43ebSMatt Arsenault 
796fecf43ebSMatt Arsenault   const SITargetLowering &TLI = *getTLI<SITargetLowering>();
797fecf43ebSMatt Arsenault   CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
798fecf43ebSMatt Arsenault 
799fecf43ebSMatt Arsenault   if (!MBB.empty())
80006c8cb03SAustin Kerbow     B.setInstr(*MBB.begin());
801fecf43ebSMatt Arsenault 
80206c8cb03SAustin Kerbow   FormalArgHandler Handler(B, MRI, AssignFn);
80306c8cb03SAustin Kerbow   if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler))
80429f30379SMatt Arsenault     return false;
805fecf43ebSMatt Arsenault 
806fecf43ebSMatt Arsenault   if (!IsEntryFunc) {
807fecf43ebSMatt Arsenault     // Special inputs come after user arguments.
808fecf43ebSMatt Arsenault     TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
809fecf43ebSMatt Arsenault   }
810fecf43ebSMatt Arsenault 
811fecf43ebSMatt Arsenault   // Start adding system SGPRs.
812fecf43ebSMatt Arsenault   if (IsEntryFunc) {
813fecf43ebSMatt Arsenault     TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
814fecf43ebSMatt Arsenault   } else {
815fecf43ebSMatt Arsenault     CCInfo.AllocateReg(Info->getScratchRSrcReg());
816fecf43ebSMatt Arsenault     TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
817fecf43ebSMatt Arsenault   }
818fecf43ebSMatt Arsenault 
819fecf43ebSMatt Arsenault   // Move back to the end of the basic block.
82006c8cb03SAustin Kerbow   B.setMBB(MBB);
821fecf43ebSMatt Arsenault 
822fecf43ebSMatt Arsenault   return true;
823000c5af3STom Stellard }
824