1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the lowering of LLVM calls to machine code calls for
12 /// GlobalISel.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUCallLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUISelLowering.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 
27 using namespace llvm;
28 
29 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
30   : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
31 }
32 
33 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
34                                      const Value *Val, unsigned VReg) const {
35   // FIXME: Add support for non-void returns.
36   if (Val)
37     return false;
38 
39   MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
40   return true;
41 }
42 
43 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
44                                                Type *ParamTy,
45                                                unsigned Offset) const {
46 
47   MachineFunction &MF = MIRBuilder.getMF();
48   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
49   MachineRegisterInfo &MRI = MF.getRegInfo();
50   const Function &F = MF.getFunction();
51   const DataLayout &DL = F.getParent()->getDataLayout();
52   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
53   LLT PtrType = getLLTForType(*PtrTy, DL);
54   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
55   unsigned KernArgSegmentPtr =
56     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
57   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
58 
59   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
60   MIRBuilder.buildConstant(OffsetReg, Offset);
61 
62   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
63 
64   return DstReg;
65 }
66 
67 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
68                                         Type *ParamTy, unsigned Offset,
69                                         unsigned DstReg) const {
70   MachineFunction &MF = MIRBuilder.getMF();
71   const Function &F = MF.getFunction();
72   const DataLayout &DL = F.getParent()->getDataLayout();
73   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
74   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
75   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
76   unsigned Align = DL.getABITypeAlignment(ParamTy);
77   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
78 
79   MachineMemOperand *MMO =
80       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
81                                        MachineMemOperand::MONonTemporal |
82                                        MachineMemOperand::MOInvariant,
83                                        TypeSize, Align);
84 
85   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
86 }
87 
88 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
89                                               const Function &F,
90                                               ArrayRef<unsigned> VRegs) const {
91   // AMDGPU_GS and AMDGP_HS are not supported yet.
92   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
93       F.getCallingConv() == CallingConv::AMDGPU_HS)
94     return false;
95 
96   MachineFunction &MF = MIRBuilder.getMF();
97   const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
98   MachineRegisterInfo &MRI = MF.getRegInfo();
99   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
100   const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
101   const DataLayout &DL = F.getParent()->getDataLayout();
102 
103   SmallVector<CCValAssign, 16> ArgLocs;
104   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
105 
106   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
107   if (Info->hasPrivateSegmentBuffer()) {
108     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
109     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
110     CCInfo.AllocateReg(PrivateSegmentBufferReg);
111   }
112 
113   if (Info->hasDispatchPtr()) {
114     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
115     // FIXME: Need to add reg as live-in
116     CCInfo.AllocateReg(DispatchPtrReg);
117   }
118 
119   if (Info->hasQueuePtr()) {
120     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
121     // FIXME: Need to add reg as live-in
122     CCInfo.AllocateReg(QueuePtrReg);
123   }
124 
125   if (Info->hasKernargSegmentPtr()) {
126     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
127     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
128     unsigned VReg = MRI.createGenericVirtualRegister(P2);
129     MRI.addLiveIn(InputPtrReg, VReg);
130     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
131     MIRBuilder.buildCopy(VReg, InputPtrReg);
132     CCInfo.AllocateReg(InputPtrReg);
133   }
134 
135   if (Info->hasDispatchID()) {
136     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
137     // FIXME: Need to add reg as live-in
138     CCInfo.AllocateReg(DispatchIDReg);
139   }
140 
141   if (Info->hasFlatScratchInit()) {
142     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
143     // FIXME: Need to add reg as live-in
144     CCInfo.AllocateReg(FlatScratchInitReg);
145   }
146 
147   unsigned NumArgs = F.arg_size();
148   Function::const_arg_iterator CurOrigArg = F.arg_begin();
149   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
150   unsigned PSInputNum = 0;
151   BitVector Skipped(NumArgs);
152   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
153     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
154 
155     // We can only hanlde simple value types at the moment.
156     ISD::ArgFlagsTy Flags;
157     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
158     setArgFlags(OrigArg, i + 1, DL, F);
159     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
160 
161     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
162         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
163         PSInputNum <= 15) {
164       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
165         Skipped.set(i);
166         ++PSInputNum;
167         continue;
168       }
169 
170       Info->markPSInputAllocated(PSInputNum);
171       if (!CurOrigArg->use_empty())
172         Info->markPSInputEnabled(PSInputNum);
173 
174       ++PSInputNum;
175     }
176 
177     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
178                                              /*IsVarArg=*/false);
179 
180     if (ValEVT.isVector()) {
181       EVT ElemVT = ValEVT.getVectorElementType();
182       if (!ValEVT.isSimple())
183         return false;
184       MVT ValVT = ElemVT.getSimpleVT();
185       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
186                           OrigArg.Flags, CCInfo);
187       if (!Res)
188         return false;
189     } else {
190       MVT ValVT = ValEVT.getSimpleVT();
191       if (!ValEVT.isSimple())
192         return false;
193       bool Res =
194           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
195 
196       // Fail if we don't know how to handle this type.
197       if (Res)
198         return false;
199     }
200   }
201 
202   Function::const_arg_iterator Arg = F.arg_begin();
203 
204   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
205       F.getCallingConv() == CallingConv::AMDGPU_PS) {
206     for (unsigned i = 0, OrigArgIdx = 0;
207          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
208        if (Skipped.test(OrigArgIdx))
209           continue;
210       CCValAssign &VA = ArgLocs[i++];
211       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
212       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
213       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
214     }
215     return true;
216   }
217 
218   for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) {
219     // FIXME: We should be getting DebugInfo from the arguments some how.
220     CCValAssign &VA = ArgLocs[i];
221     lowerParameter(MIRBuilder, Arg->getType(),
222                    VA.getLocMemOffset() +
223                    Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
224   }
225 
226   return true;
227 }
228