1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the lowering of LLVM calls to machine code calls for
12 /// GlobalISel.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUCallLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUISelLowering.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 
28 using namespace llvm;
29 
AMDGPUCallLowering(const AMDGPUTargetLowering & TLI)30 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
31   : CallLowering(&TLI) {
32 }
33 
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,ArrayRef<unsigned> VRegs) const34 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
35                                      const Value *Val,
36                                      ArrayRef<unsigned> VRegs) const {
37   // FIXME: Add support for non-void returns.
38   if (Val)
39     return false;
40 
41   MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
42   return true;
43 }
44 
lowerParameterPtr(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset) const45 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
46                                                Type *ParamTy,
47                                                uint64_t Offset) const {
48 
49   MachineFunction &MF = MIRBuilder.getMF();
50   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
51   MachineRegisterInfo &MRI = MF.getRegInfo();
52   const Function &F = MF.getFunction();
53   const DataLayout &DL = F.getParent()->getDataLayout();
54   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
55   LLT PtrType = getLLTForType(*PtrTy, DL);
56   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
57   unsigned KernArgSegmentPtr =
58     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
59   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
60 
61   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
62   MIRBuilder.buildConstant(OffsetReg, Offset);
63 
64   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
65 
66   return DstReg;
67 }
68 
lowerParameter(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset,unsigned Align,unsigned DstReg) const69 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
70                                         Type *ParamTy, uint64_t Offset,
71                                         unsigned Align,
72                                         unsigned DstReg) const {
73   MachineFunction &MF = MIRBuilder.getMF();
74   const Function &F = MF.getFunction();
75   const DataLayout &DL = F.getParent()->getDataLayout();
76   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
77   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
78   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
79   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
80 
81   MachineMemOperand *MMO =
82       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
83                                        MachineMemOperand::MONonTemporal |
84                                        MachineMemOperand::MOInvariant,
85                                        TypeSize, Align);
86 
87   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
88 }
89 
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<unsigned> VRegs) const90 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
91                                               const Function &F,
92                                               ArrayRef<unsigned> VRegs) const {
93   // AMDGPU_GS and AMDGP_HS are not supported yet.
94   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
95       F.getCallingConv() == CallingConv::AMDGPU_HS)
96     return false;
97 
98   MachineFunction &MF = MIRBuilder.getMF();
99   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
100   MachineRegisterInfo &MRI = MF.getRegInfo();
101   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
102   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
103   const DataLayout &DL = F.getParent()->getDataLayout();
104 
105   SmallVector<CCValAssign, 16> ArgLocs;
106   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
107 
108   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
109   if (Info->hasPrivateSegmentBuffer()) {
110     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
111     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
112     CCInfo.AllocateReg(PrivateSegmentBufferReg);
113   }
114 
115   if (Info->hasDispatchPtr()) {
116     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
117     // FIXME: Need to add reg as live-in
118     CCInfo.AllocateReg(DispatchPtrReg);
119   }
120 
121   if (Info->hasQueuePtr()) {
122     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
123     // FIXME: Need to add reg as live-in
124     CCInfo.AllocateReg(QueuePtrReg);
125   }
126 
127   if (Info->hasKernargSegmentPtr()) {
128     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
129     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
130     unsigned VReg = MRI.createGenericVirtualRegister(P2);
131     MRI.addLiveIn(InputPtrReg, VReg);
132     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
133     MIRBuilder.buildCopy(VReg, InputPtrReg);
134     CCInfo.AllocateReg(InputPtrReg);
135   }
136 
137   if (Info->hasDispatchID()) {
138     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
139     // FIXME: Need to add reg as live-in
140     CCInfo.AllocateReg(DispatchIDReg);
141   }
142 
143   if (Info->hasFlatScratchInit()) {
144     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
145     // FIXME: Need to add reg as live-in
146     CCInfo.AllocateReg(FlatScratchInitReg);
147   }
148 
149   // The infrastructure for normal calling convention lowering is essentially
150   // useless for kernels. We want to avoid any kind of legalization or argument
151   // splitting.
152   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
153     unsigned i = 0;
154     const unsigned KernArgBaseAlign = 16;
155     const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
156     uint64_t ExplicitArgOffset = 0;
157 
158     // TODO: Align down to dword alignment and extract bits for extending loads.
159     for (auto &Arg : F.args()) {
160       Type *ArgTy = Arg.getType();
161       unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
162       if (AllocSize == 0)
163         continue;
164 
165       unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
166 
167       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
168       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
169 
170       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
171       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
172       lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
173       ++i;
174     }
175 
176     return true;
177   }
178 
179   unsigned NumArgs = F.arg_size();
180   Function::const_arg_iterator CurOrigArg = F.arg_begin();
181   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
182   unsigned PSInputNum = 0;
183   BitVector Skipped(NumArgs);
184   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
185     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
186 
187     // We can only hanlde simple value types at the moment.
188     ISD::ArgFlagsTy Flags;
189     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
190     setArgFlags(OrigArg, i + 1, DL, F);
191     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
192 
193     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
194         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
195         PSInputNum <= 15) {
196       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
197         Skipped.set(i);
198         ++PSInputNum;
199         continue;
200       }
201 
202       Info->markPSInputAllocated(PSInputNum);
203       if (!CurOrigArg->use_empty())
204         Info->markPSInputEnabled(PSInputNum);
205 
206       ++PSInputNum;
207     }
208 
209     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
210                                              /*IsVarArg=*/false);
211 
212     if (ValEVT.isVector()) {
213       EVT ElemVT = ValEVT.getVectorElementType();
214       if (!ValEVT.isSimple())
215         return false;
216       MVT ValVT = ElemVT.getSimpleVT();
217       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
218                           OrigArg.Flags, CCInfo);
219       if (!Res)
220         return false;
221     } else {
222       MVT ValVT = ValEVT.getSimpleVT();
223       if (!ValEVT.isSimple())
224         return false;
225       bool Res =
226           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
227 
228       // Fail if we don't know how to handle this type.
229       if (Res)
230         return false;
231     }
232   }
233 
234   Function::const_arg_iterator Arg = F.arg_begin();
235 
236   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
237       F.getCallingConv() == CallingConv::AMDGPU_PS) {
238     for (unsigned i = 0, OrigArgIdx = 0;
239          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
240        if (Skipped.test(OrigArgIdx))
241           continue;
242       CCValAssign &VA = ArgLocs[i++];
243       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
244       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
245       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
246     }
247     return true;
248   }
249 
250   return false;
251 }
252