1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the lowering of LLVM calls to machine code calls for
12 /// GlobalISel.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "AMDGPUCallLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUISelLowering.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27
28 using namespace llvm;
29
AMDGPUCallLowering(const AMDGPUTargetLowering & TLI)30 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
31 : CallLowering(&TLI) {
32 }
33
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,ArrayRef<unsigned> VRegs) const34 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
35 const Value *Val,
36 ArrayRef<unsigned> VRegs) const {
37 // FIXME: Add support for non-void returns.
38 if (Val)
39 return false;
40
41 MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
42 return true;
43 }
44
lowerParameterPtr(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset) const45 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
46 Type *ParamTy,
47 uint64_t Offset) const {
48
49 MachineFunction &MF = MIRBuilder.getMF();
50 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
51 MachineRegisterInfo &MRI = MF.getRegInfo();
52 const Function &F = MF.getFunction();
53 const DataLayout &DL = F.getParent()->getDataLayout();
54 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
55 LLT PtrType = getLLTForType(*PtrTy, DL);
56 unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
57 unsigned KernArgSegmentPtr =
58 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
59 unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
60
61 unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
62 MIRBuilder.buildConstant(OffsetReg, Offset);
63
64 MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
65
66 return DstReg;
67 }
68
lowerParameter(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset,unsigned Align,unsigned DstReg) const69 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
70 Type *ParamTy, uint64_t Offset,
71 unsigned Align,
72 unsigned DstReg) const {
73 MachineFunction &MF = MIRBuilder.getMF();
74 const Function &F = MF.getFunction();
75 const DataLayout &DL = F.getParent()->getDataLayout();
76 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
77 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
78 unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
79 unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
80
81 MachineMemOperand *MMO =
82 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
83 MachineMemOperand::MONonTemporal |
84 MachineMemOperand::MOInvariant,
85 TypeSize, Align);
86
87 MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
88 }
89
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<unsigned> VRegs) const90 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
91 const Function &F,
92 ArrayRef<unsigned> VRegs) const {
93 // AMDGPU_GS and AMDGP_HS are not supported yet.
94 if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
95 F.getCallingConv() == CallingConv::AMDGPU_HS)
96 return false;
97
98 MachineFunction &MF = MIRBuilder.getMF();
99 const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
100 MachineRegisterInfo &MRI = MF.getRegInfo();
101 SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
102 const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
103 const DataLayout &DL = F.getParent()->getDataLayout();
104
105 SmallVector<CCValAssign, 16> ArgLocs;
106 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
107
108 // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
109 if (Info->hasPrivateSegmentBuffer()) {
110 unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
111 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
112 CCInfo.AllocateReg(PrivateSegmentBufferReg);
113 }
114
115 if (Info->hasDispatchPtr()) {
116 unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
117 // FIXME: Need to add reg as live-in
118 CCInfo.AllocateReg(DispatchPtrReg);
119 }
120
121 if (Info->hasQueuePtr()) {
122 unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
123 // FIXME: Need to add reg as live-in
124 CCInfo.AllocateReg(QueuePtrReg);
125 }
126
127 if (Info->hasKernargSegmentPtr()) {
128 unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
129 const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
130 unsigned VReg = MRI.createGenericVirtualRegister(P2);
131 MRI.addLiveIn(InputPtrReg, VReg);
132 MIRBuilder.getMBB().addLiveIn(InputPtrReg);
133 MIRBuilder.buildCopy(VReg, InputPtrReg);
134 CCInfo.AllocateReg(InputPtrReg);
135 }
136
137 if (Info->hasDispatchID()) {
138 unsigned DispatchIDReg = Info->addDispatchID(*TRI);
139 // FIXME: Need to add reg as live-in
140 CCInfo.AllocateReg(DispatchIDReg);
141 }
142
143 if (Info->hasFlatScratchInit()) {
144 unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
145 // FIXME: Need to add reg as live-in
146 CCInfo.AllocateReg(FlatScratchInitReg);
147 }
148
149 // The infrastructure for normal calling convention lowering is essentially
150 // useless for kernels. We want to avoid any kind of legalization or argument
151 // splitting.
152 if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
153 unsigned i = 0;
154 const unsigned KernArgBaseAlign = 16;
155 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
156 uint64_t ExplicitArgOffset = 0;
157
158 // TODO: Align down to dword alignment and extract bits for extending loads.
159 for (auto &Arg : F.args()) {
160 Type *ArgTy = Arg.getType();
161 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
162 if (AllocSize == 0)
163 continue;
164
165 unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
166
167 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
168 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
169
170 unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
171 ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
172 lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
173 ++i;
174 }
175
176 return true;
177 }
178
179 unsigned NumArgs = F.arg_size();
180 Function::const_arg_iterator CurOrigArg = F.arg_begin();
181 const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
182 unsigned PSInputNum = 0;
183 BitVector Skipped(NumArgs);
184 for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
185 EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
186
187 // We can only hanlde simple value types at the moment.
188 ISD::ArgFlagsTy Flags;
189 ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
190 setArgFlags(OrigArg, i + 1, DL, F);
191 Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
192
193 if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
194 !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
195 PSInputNum <= 15) {
196 if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
197 Skipped.set(i);
198 ++PSInputNum;
199 continue;
200 }
201
202 Info->markPSInputAllocated(PSInputNum);
203 if (!CurOrigArg->use_empty())
204 Info->markPSInputEnabled(PSInputNum);
205
206 ++PSInputNum;
207 }
208
209 CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
210 /*IsVarArg=*/false);
211
212 if (ValEVT.isVector()) {
213 EVT ElemVT = ValEVT.getVectorElementType();
214 if (!ValEVT.isSimple())
215 return false;
216 MVT ValVT = ElemVT.getSimpleVT();
217 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
218 OrigArg.Flags, CCInfo);
219 if (!Res)
220 return false;
221 } else {
222 MVT ValVT = ValEVT.getSimpleVT();
223 if (!ValEVT.isSimple())
224 return false;
225 bool Res =
226 AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
227
228 // Fail if we don't know how to handle this type.
229 if (Res)
230 return false;
231 }
232 }
233
234 Function::const_arg_iterator Arg = F.arg_begin();
235
236 if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
237 F.getCallingConv() == CallingConv::AMDGPU_PS) {
238 for (unsigned i = 0, OrigArgIdx = 0;
239 OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
240 if (Skipped.test(OrigArgIdx))
241 continue;
242 CCValAssign &VA = ArgLocs[i++];
243 MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
244 MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
245 MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
246 }
247 return true;
248 }
249
250 return false;
251 }
252