1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIRegisterInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "llvm/CodeGen/Analysis.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/Support/LowLevelTypeImpl.h"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 struct OutgoingArgHandler : public CallLowering::ValueHandler {
34   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35                      MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37 
38   MachineInstrBuilder MIB;
39 
40   unsigned getStackAddress(uint64_t Size, int64_t Offset,
41                            MachinePointerInfo &MPO) override {
42     llvm_unreachable("not implemented");
43   }
44 
45   void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
46                             MachinePointerInfo &MPO, CCValAssign &VA) override {
47     llvm_unreachable("not implemented");
48   }
49 
50   void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
51                         CCValAssign &VA) override {
52     MIB.addUse(PhysReg);
53     MIRBuilder.buildCopy(PhysReg, ValVReg);
54   }
55 
56   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
57                  CCValAssign::LocInfo LocInfo,
58                  const CallLowering::ArgInfo &Info,
59                  CCState &State) override {
60     return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
61   }
62 };
63 
64 }
65 
66 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
67   : CallLowering(&TLI) {
68 }
69 
70 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
71                                      const Value *Val,
72                                      ArrayRef<unsigned> VRegs) const {
73 
74   MachineFunction &MF = MIRBuilder.getMF();
75   MachineRegisterInfo &MRI = MF.getRegInfo();
76   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
77   MFI->setIfReturnsVoid(!Val);
78 
79   if (!Val) {
80     MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
81     return true;
82   }
83 
84   unsigned VReg = VRegs[0];
85 
86   const Function &F = MF.getFunction();
87   auto &DL = F.getParent()->getDataLayout();
88   if (!AMDGPU::isShader(F.getCallingConv()))
89     return false;
90 
91 
92   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
93   SmallVector<EVT, 4> SplitVTs;
94   SmallVector<uint64_t, 4> Offsets;
95   ArgInfo OrigArg{VReg, Val->getType()};
96   setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
97   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
98 
99   SmallVector<ArgInfo, 8> SplitArgs;
100   CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
101   for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
102     Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
103     SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
104   }
105   auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
106   OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
107   if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
108     return false;
109   MIRBuilder.insertInstr(RetInstr);
110 
111   return true;
112 }
113 
114 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
115                                                Type *ParamTy,
116                                                uint64_t Offset) const {
117 
118   MachineFunction &MF = MIRBuilder.getMF();
119   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
120   MachineRegisterInfo &MRI = MF.getRegInfo();
121   const Function &F = MF.getFunction();
122   const DataLayout &DL = F.getParent()->getDataLayout();
123   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
124   LLT PtrType = getLLTForType(*PtrTy, DL);
125   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
126   unsigned KernArgSegmentPtr =
127     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
128   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
129 
130   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
131   MIRBuilder.buildConstant(OffsetReg, Offset);
132 
133   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
134 
135   return DstReg;
136 }
137 
138 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
139                                         Type *ParamTy, uint64_t Offset,
140                                         unsigned Align,
141                                         unsigned DstReg) const {
142   MachineFunction &MF = MIRBuilder.getMF();
143   const Function &F = MF.getFunction();
144   const DataLayout &DL = F.getParent()->getDataLayout();
145   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
146   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
147   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
148   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
149 
150   MachineMemOperand *MMO =
151       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
152                                        MachineMemOperand::MONonTemporal |
153                                        MachineMemOperand::MOInvariant,
154                                        TypeSize, Align);
155 
156   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
157 }
158 
159 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
160                                               const Function &F,
161                                               ArrayRef<unsigned> VRegs) const {
162   // AMDGPU_GS and AMDGP_HS are not supported yet.
163   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
164       F.getCallingConv() == CallingConv::AMDGPU_HS)
165     return false;
166 
167   MachineFunction &MF = MIRBuilder.getMF();
168   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
169   MachineRegisterInfo &MRI = MF.getRegInfo();
170   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
171   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
172   const DataLayout &DL = F.getParent()->getDataLayout();
173 
174   SmallVector<CCValAssign, 16> ArgLocs;
175   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
176 
177   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
178   if (Info->hasPrivateSegmentBuffer()) {
179     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
180     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
181     CCInfo.AllocateReg(PrivateSegmentBufferReg);
182   }
183 
184   if (Info->hasDispatchPtr()) {
185     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
186     // FIXME: Need to add reg as live-in
187     CCInfo.AllocateReg(DispatchPtrReg);
188   }
189 
190   if (Info->hasQueuePtr()) {
191     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
192     // FIXME: Need to add reg as live-in
193     CCInfo.AllocateReg(QueuePtrReg);
194   }
195 
196   if (Info->hasKernargSegmentPtr()) {
197     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
198     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
199     unsigned VReg = MRI.createGenericVirtualRegister(P2);
200     MRI.addLiveIn(InputPtrReg, VReg);
201     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
202     MIRBuilder.buildCopy(VReg, InputPtrReg);
203     CCInfo.AllocateReg(InputPtrReg);
204   }
205 
206   if (Info->hasDispatchID()) {
207     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
208     // FIXME: Need to add reg as live-in
209     CCInfo.AllocateReg(DispatchIDReg);
210   }
211 
212   if (Info->hasFlatScratchInit()) {
213     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
214     // FIXME: Need to add reg as live-in
215     CCInfo.AllocateReg(FlatScratchInitReg);
216   }
217 
218   // The infrastructure for normal calling convention lowering is essentially
219   // useless for kernels. We want to avoid any kind of legalization or argument
220   // splitting.
221   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
222     unsigned i = 0;
223     const unsigned KernArgBaseAlign = 16;
224     const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
225     uint64_t ExplicitArgOffset = 0;
226 
227     // TODO: Align down to dword alignment and extract bits for extending loads.
228     for (auto &Arg : F.args()) {
229       Type *ArgTy = Arg.getType();
230       unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
231       if (AllocSize == 0)
232         continue;
233 
234       unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
235 
236       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
237       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
238 
239       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
240       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
241       lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
242       ++i;
243     }
244 
245     return true;
246   }
247 
248   unsigned NumArgs = F.arg_size();
249   Function::const_arg_iterator CurOrigArg = F.arg_begin();
250   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
251   unsigned PSInputNum = 0;
252   BitVector Skipped(NumArgs);
253   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
254     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
255 
256     // We can only hanlde simple value types at the moment.
257     ISD::ArgFlagsTy Flags;
258     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
259     setArgFlags(OrigArg, i + 1, DL, F);
260     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
261 
262     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
263         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
264         PSInputNum <= 15) {
265       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
266         Skipped.set(i);
267         ++PSInputNum;
268         continue;
269       }
270 
271       Info->markPSInputAllocated(PSInputNum);
272       if (!CurOrigArg->use_empty())
273         Info->markPSInputEnabled(PSInputNum);
274 
275       ++PSInputNum;
276     }
277 
278     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
279                                              /*IsVarArg=*/false);
280 
281     if (ValEVT.isVector()) {
282       EVT ElemVT = ValEVT.getVectorElementType();
283       if (!ValEVT.isSimple())
284         return false;
285       MVT ValVT = ElemVT.getSimpleVT();
286       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
287                           OrigArg.Flags, CCInfo);
288       if (!Res)
289         return false;
290     } else {
291       MVT ValVT = ValEVT.getSimpleVT();
292       if (!ValEVT.isSimple())
293         return false;
294       bool Res =
295           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
296 
297       // Fail if we don't know how to handle this type.
298       if (Res)
299         return false;
300     }
301   }
302 
303   Function::const_arg_iterator Arg = F.arg_begin();
304 
305   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
306       F.getCallingConv() == CallingConv::AMDGPU_PS) {
307     for (unsigned i = 0, OrigArgIdx = 0;
308          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
309        if (Skipped.test(OrigArgIdx))
310           continue;
311       CCValAssign &VA = ArgLocs[i++];
312       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
313       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
314       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
315     }
316     return true;
317   }
318 
319   return false;
320 }
321