1d88c1a5aSDimitry Andric //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
23ca95b02SDimitry Andric //
33ca95b02SDimitry Andric //                     The LLVM Compiler Infrastructure
43ca95b02SDimitry Andric //
53ca95b02SDimitry Andric // This file is distributed under the University of Illinois Open Source
63ca95b02SDimitry Andric // License. See LICENSE.TXT for details.
73ca95b02SDimitry Andric //
83ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
93ca95b02SDimitry Andric ///
103ca95b02SDimitry Andric /// \file
113ca95b02SDimitry Andric /// This file implements the lowering of LLVM calls to machine code calls for
123ca95b02SDimitry Andric /// GlobalISel.
133ca95b02SDimitry Andric ///
143ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
153ca95b02SDimitry Andric 
163ca95b02SDimitry Andric #include "AMDGPUCallLowering.h"
177a7e6055SDimitry Andric #include "AMDGPU.h"
183ca95b02SDimitry Andric #include "AMDGPUISelLowering.h"
197a7e6055SDimitry Andric #include "AMDGPUSubtarget.h"
207a7e6055SDimitry Andric #include "SIISelLowering.h"
217a7e6055SDimitry Andric #include "SIMachineFunctionInfo.h"
22db17bf38SDimitry Andric #include "SIRegisterInfo.h"
234ba319b5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
247a7e6055SDimitry Andric #include "llvm/CodeGen/CallingConvLower.h"
253ca95b02SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
263ca95b02SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
273ca95b02SDimitry Andric 
283ca95b02SDimitry Andric using namespace llvm;
293ca95b02SDimitry Andric 
AMDGPUCallLowering(const AMDGPUTargetLowering & TLI)303ca95b02SDimitry Andric AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
31*b5893f02SDimitry Andric   : CallLowering(&TLI) {
323ca95b02SDimitry Andric }
333ca95b02SDimitry Andric 
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,ArrayRef<unsigned> VRegs) const343ca95b02SDimitry Andric bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
35*b5893f02SDimitry Andric                                      const Value *Val,
36*b5893f02SDimitry Andric                                      ArrayRef<unsigned> VRegs) const {
374ba319b5SDimitry Andric   // FIXME: Add support for non-void returns.
384ba319b5SDimitry Andric   if (Val)
394ba319b5SDimitry Andric     return false;
404ba319b5SDimitry Andric 
417a7e6055SDimitry Andric   MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
423ca95b02SDimitry Andric   return true;
433ca95b02SDimitry Andric }
443ca95b02SDimitry Andric 
lowerParameterPtr(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset) const457a7e6055SDimitry Andric unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
467a7e6055SDimitry Andric                                                Type *ParamTy,
474ba319b5SDimitry Andric                                                uint64_t Offset) const {
487a7e6055SDimitry Andric 
497a7e6055SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
502cab237bSDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
517a7e6055SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
522cab237bSDimitry Andric   const Function &F = MF.getFunction();
537a7e6055SDimitry Andric   const DataLayout &DL = F.getParent()->getDataLayout();
54*b5893f02SDimitry Andric   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
557a7e6055SDimitry Andric   LLT PtrType = getLLTForType(*PtrTy, DL);
567a7e6055SDimitry Andric   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
577a7e6055SDimitry Andric   unsigned KernArgSegmentPtr =
582cab237bSDimitry Andric     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
597a7e6055SDimitry Andric   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
607a7e6055SDimitry Andric 
617a7e6055SDimitry Andric   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
627a7e6055SDimitry Andric   MIRBuilder.buildConstant(OffsetReg, Offset);
637a7e6055SDimitry Andric 
647a7e6055SDimitry Andric   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
657a7e6055SDimitry Andric 
667a7e6055SDimitry Andric   return DstReg;
677a7e6055SDimitry Andric }
687a7e6055SDimitry Andric 
lowerParameter(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset,unsigned Align,unsigned DstReg) const697a7e6055SDimitry Andric void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
704ba319b5SDimitry Andric                                         Type *ParamTy, uint64_t Offset,
714ba319b5SDimitry Andric                                         unsigned Align,
727a7e6055SDimitry Andric                                         unsigned DstReg) const {
737a7e6055SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
742cab237bSDimitry Andric   const Function &F = MF.getFunction();
757a7e6055SDimitry Andric   const DataLayout &DL = F.getParent()->getDataLayout();
76*b5893f02SDimitry Andric   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
777a7e6055SDimitry Andric   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
787a7e6055SDimitry Andric   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
797a7e6055SDimitry Andric   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
807a7e6055SDimitry Andric 
817a7e6055SDimitry Andric   MachineMemOperand *MMO =
827a7e6055SDimitry Andric       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
837a7e6055SDimitry Andric                                        MachineMemOperand::MONonTemporal |
847a7e6055SDimitry Andric                                        MachineMemOperand::MOInvariant,
857a7e6055SDimitry Andric                                        TypeSize, Align);
867a7e6055SDimitry Andric 
877a7e6055SDimitry Andric   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
887a7e6055SDimitry Andric }
897a7e6055SDimitry Andric 
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<unsigned> VRegs) const90d88c1a5aSDimitry Andric bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
91d88c1a5aSDimitry Andric                                               const Function &F,
92d88c1a5aSDimitry Andric                                               ArrayRef<unsigned> VRegs) const {
934ba319b5SDimitry Andric   // AMDGPU_GS and AMDGP_HS are not supported yet.
944ba319b5SDimitry Andric   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
954ba319b5SDimitry Andric       F.getCallingConv() == CallingConv::AMDGPU_HS)
964ba319b5SDimitry Andric     return false;
977a7e6055SDimitry Andric 
987a7e6055SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
994ba319b5SDimitry Andric   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
1007a7e6055SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
1017a7e6055SDimitry Andric   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1024ba319b5SDimitry Andric   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1037a7e6055SDimitry Andric   const DataLayout &DL = F.getParent()->getDataLayout();
1047a7e6055SDimitry Andric 
1057a7e6055SDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
1067a7e6055SDimitry Andric   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
1077a7e6055SDimitry Andric 
1087a7e6055SDimitry Andric   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
1097a7e6055SDimitry Andric   if (Info->hasPrivateSegmentBuffer()) {
1107a7e6055SDimitry Andric     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
1117a7e6055SDimitry Andric     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
1127a7e6055SDimitry Andric     CCInfo.AllocateReg(PrivateSegmentBufferReg);
1137a7e6055SDimitry Andric   }
1147a7e6055SDimitry Andric 
1157a7e6055SDimitry Andric   if (Info->hasDispatchPtr()) {
1167a7e6055SDimitry Andric     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
1177a7e6055SDimitry Andric     // FIXME: Need to add reg as live-in
1187a7e6055SDimitry Andric     CCInfo.AllocateReg(DispatchPtrReg);
1197a7e6055SDimitry Andric   }
1207a7e6055SDimitry Andric 
1217a7e6055SDimitry Andric   if (Info->hasQueuePtr()) {
1227a7e6055SDimitry Andric     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
1237a7e6055SDimitry Andric     // FIXME: Need to add reg as live-in
1247a7e6055SDimitry Andric     CCInfo.AllocateReg(QueuePtrReg);
1257a7e6055SDimitry Andric   }
1267a7e6055SDimitry Andric 
1277a7e6055SDimitry Andric   if (Info->hasKernargSegmentPtr()) {
1287a7e6055SDimitry Andric     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
1294ba319b5SDimitry Andric     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
1307a7e6055SDimitry Andric     unsigned VReg = MRI.createGenericVirtualRegister(P2);
1317a7e6055SDimitry Andric     MRI.addLiveIn(InputPtrReg, VReg);
1327a7e6055SDimitry Andric     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
1337a7e6055SDimitry Andric     MIRBuilder.buildCopy(VReg, InputPtrReg);
1347a7e6055SDimitry Andric     CCInfo.AllocateReg(InputPtrReg);
1357a7e6055SDimitry Andric   }
1367a7e6055SDimitry Andric 
1377a7e6055SDimitry Andric   if (Info->hasDispatchID()) {
1387a7e6055SDimitry Andric     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
1397a7e6055SDimitry Andric     // FIXME: Need to add reg as live-in
1407a7e6055SDimitry Andric     CCInfo.AllocateReg(DispatchIDReg);
1417a7e6055SDimitry Andric   }
1427a7e6055SDimitry Andric 
1437a7e6055SDimitry Andric   if (Info->hasFlatScratchInit()) {
1447a7e6055SDimitry Andric     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
1457a7e6055SDimitry Andric     // FIXME: Need to add reg as live-in
1467a7e6055SDimitry Andric     CCInfo.AllocateReg(FlatScratchInitReg);
1477a7e6055SDimitry Andric   }
1487a7e6055SDimitry Andric 
1494ba319b5SDimitry Andric   // The infrastructure for normal calling convention lowering is essentially
1504ba319b5SDimitry Andric   // useless for kernels. We want to avoid any kind of legalization or argument
1514ba319b5SDimitry Andric   // splitting.
1524ba319b5SDimitry Andric   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
1534ba319b5SDimitry Andric     unsigned i = 0;
1544ba319b5SDimitry Andric     const unsigned KernArgBaseAlign = 16;
1554ba319b5SDimitry Andric     const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
1564ba319b5SDimitry Andric     uint64_t ExplicitArgOffset = 0;
1574ba319b5SDimitry Andric 
1584ba319b5SDimitry Andric     // TODO: Align down to dword alignment and extract bits for extending loads.
1594ba319b5SDimitry Andric     for (auto &Arg : F.args()) {
1604ba319b5SDimitry Andric       Type *ArgTy = Arg.getType();
1614ba319b5SDimitry Andric       unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
1624ba319b5SDimitry Andric       if (AllocSize == 0)
1634ba319b5SDimitry Andric         continue;
1644ba319b5SDimitry Andric 
1654ba319b5SDimitry Andric       unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
1664ba319b5SDimitry Andric 
1674ba319b5SDimitry Andric       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
1684ba319b5SDimitry Andric       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
1694ba319b5SDimitry Andric 
1704ba319b5SDimitry Andric       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
1714ba319b5SDimitry Andric       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
1724ba319b5SDimitry Andric       lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
1734ba319b5SDimitry Andric       ++i;
1744ba319b5SDimitry Andric     }
1754ba319b5SDimitry Andric 
1764ba319b5SDimitry Andric     return true;
1774ba319b5SDimitry Andric   }
1784ba319b5SDimitry Andric 
1797a7e6055SDimitry Andric   unsigned NumArgs = F.arg_size();
1807a7e6055SDimitry Andric   Function::const_arg_iterator CurOrigArg = F.arg_begin();
1817a7e6055SDimitry Andric   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
1824ba319b5SDimitry Andric   unsigned PSInputNum = 0;
1834ba319b5SDimitry Andric   BitVector Skipped(NumArgs);
1847a7e6055SDimitry Andric   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
1852cab237bSDimitry Andric     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
1862cab237bSDimitry Andric 
1872cab237bSDimitry Andric     // We can only hanlde simple value types at the moment.
1887a7e6055SDimitry Andric     ISD::ArgFlagsTy Flags;
1892cab237bSDimitry Andric     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
1902cab237bSDimitry Andric     setArgFlags(OrigArg, i + 1, DL, F);
1917a7e6055SDimitry Andric     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
1924ba319b5SDimitry Andric 
1934ba319b5SDimitry Andric     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
1944ba319b5SDimitry Andric         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
1954ba319b5SDimitry Andric         PSInputNum <= 15) {
1964ba319b5SDimitry Andric       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
1974ba319b5SDimitry Andric         Skipped.set(i);
1984ba319b5SDimitry Andric         ++PSInputNum;
1994ba319b5SDimitry Andric         continue;
2004ba319b5SDimitry Andric       }
2014ba319b5SDimitry Andric 
2024ba319b5SDimitry Andric       Info->markPSInputAllocated(PSInputNum);
2034ba319b5SDimitry Andric       if (!CurOrigArg->use_empty())
2044ba319b5SDimitry Andric         Info->markPSInputEnabled(PSInputNum);
2054ba319b5SDimitry Andric 
2064ba319b5SDimitry Andric       ++PSInputNum;
2074ba319b5SDimitry Andric     }
2084ba319b5SDimitry Andric 
2097a7e6055SDimitry Andric     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
2107a7e6055SDimitry Andric                                              /*IsVarArg=*/false);
2114ba319b5SDimitry Andric 
2124ba319b5SDimitry Andric     if (ValEVT.isVector()) {
2134ba319b5SDimitry Andric       EVT ElemVT = ValEVT.getVectorElementType();
2144ba319b5SDimitry Andric       if (!ValEVT.isSimple())
2154ba319b5SDimitry Andric         return false;
2164ba319b5SDimitry Andric       MVT ValVT = ElemVT.getSimpleVT();
2174ba319b5SDimitry Andric       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
2184ba319b5SDimitry Andric                           OrigArg.Flags, CCInfo);
2194ba319b5SDimitry Andric       if (!Res)
2204ba319b5SDimitry Andric         return false;
2214ba319b5SDimitry Andric     } else {
2224ba319b5SDimitry Andric       MVT ValVT = ValEVT.getSimpleVT();
2234ba319b5SDimitry Andric       if (!ValEVT.isSimple())
2244ba319b5SDimitry Andric         return false;
2257a7e6055SDimitry Andric       bool Res =
2262cab237bSDimitry Andric           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
2272cab237bSDimitry Andric 
2282cab237bSDimitry Andric       // Fail if we don't know how to handle this type.
2292cab237bSDimitry Andric       if (Res)
2302cab237bSDimitry Andric         return false;
2317a7e6055SDimitry Andric     }
2324ba319b5SDimitry Andric   }
2337a7e6055SDimitry Andric 
2347a7e6055SDimitry Andric   Function::const_arg_iterator Arg = F.arg_begin();
2352cab237bSDimitry Andric 
2364ba319b5SDimitry Andric   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
2374ba319b5SDimitry Andric       F.getCallingConv() == CallingConv::AMDGPU_PS) {
2384ba319b5SDimitry Andric     for (unsigned i = 0, OrigArgIdx = 0;
2394ba319b5SDimitry Andric          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
2404ba319b5SDimitry Andric        if (Skipped.test(OrigArgIdx))
2414ba319b5SDimitry Andric           continue;
2424ba319b5SDimitry Andric       CCValAssign &VA = ArgLocs[i++];
2434ba319b5SDimitry Andric       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
2442cab237bSDimitry Andric       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
2454ba319b5SDimitry Andric       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
2462cab237bSDimitry Andric     }
2472cab237bSDimitry Andric     return true;
2482cab237bSDimitry Andric   }
2492cab237bSDimitry Andric 
2504ba319b5SDimitry Andric   return false;
2513ca95b02SDimitry Andric }
252