1d88c1a5aSDimitry Andric //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
23ca95b02SDimitry Andric //
33ca95b02SDimitry Andric // The LLVM Compiler Infrastructure
43ca95b02SDimitry Andric //
53ca95b02SDimitry Andric // This file is distributed under the University of Illinois Open Source
63ca95b02SDimitry Andric // License. See LICENSE.TXT for details.
73ca95b02SDimitry Andric //
83ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
93ca95b02SDimitry Andric ///
103ca95b02SDimitry Andric /// \file
113ca95b02SDimitry Andric /// This file implements the lowering of LLVM calls to machine code calls for
123ca95b02SDimitry Andric /// GlobalISel.
133ca95b02SDimitry Andric ///
143ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
153ca95b02SDimitry Andric
163ca95b02SDimitry Andric #include "AMDGPUCallLowering.h"
177a7e6055SDimitry Andric #include "AMDGPU.h"
183ca95b02SDimitry Andric #include "AMDGPUISelLowering.h"
197a7e6055SDimitry Andric #include "AMDGPUSubtarget.h"
207a7e6055SDimitry Andric #include "SIISelLowering.h"
217a7e6055SDimitry Andric #include "SIMachineFunctionInfo.h"
22db17bf38SDimitry Andric #include "SIRegisterInfo.h"
234ba319b5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
247a7e6055SDimitry Andric #include "llvm/CodeGen/CallingConvLower.h"
253ca95b02SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
263ca95b02SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
273ca95b02SDimitry Andric
283ca95b02SDimitry Andric using namespace llvm;
293ca95b02SDimitry Andric
AMDGPUCallLowering(const AMDGPUTargetLowering & TLI)303ca95b02SDimitry Andric AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
31*b5893f02SDimitry Andric : CallLowering(&TLI) {
323ca95b02SDimitry Andric }
333ca95b02SDimitry Andric
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,ArrayRef<unsigned> VRegs) const343ca95b02SDimitry Andric bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
35*b5893f02SDimitry Andric const Value *Val,
36*b5893f02SDimitry Andric ArrayRef<unsigned> VRegs) const {
374ba319b5SDimitry Andric // FIXME: Add support for non-void returns.
384ba319b5SDimitry Andric if (Val)
394ba319b5SDimitry Andric return false;
404ba319b5SDimitry Andric
417a7e6055SDimitry Andric MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
423ca95b02SDimitry Andric return true;
433ca95b02SDimitry Andric }
443ca95b02SDimitry Andric
lowerParameterPtr(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset) const457a7e6055SDimitry Andric unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
467a7e6055SDimitry Andric Type *ParamTy,
474ba319b5SDimitry Andric uint64_t Offset) const {
487a7e6055SDimitry Andric
497a7e6055SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
502cab237bSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
517a7e6055SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
522cab237bSDimitry Andric const Function &F = MF.getFunction();
537a7e6055SDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout();
54*b5893f02SDimitry Andric PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
557a7e6055SDimitry Andric LLT PtrType = getLLTForType(*PtrTy, DL);
567a7e6055SDimitry Andric unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
577a7e6055SDimitry Andric unsigned KernArgSegmentPtr =
582cab237bSDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
597a7e6055SDimitry Andric unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
607a7e6055SDimitry Andric
617a7e6055SDimitry Andric unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
627a7e6055SDimitry Andric MIRBuilder.buildConstant(OffsetReg, Offset);
637a7e6055SDimitry Andric
647a7e6055SDimitry Andric MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
657a7e6055SDimitry Andric
667a7e6055SDimitry Andric return DstReg;
677a7e6055SDimitry Andric }
687a7e6055SDimitry Andric
lowerParameter(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset,unsigned Align,unsigned DstReg) const697a7e6055SDimitry Andric void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
704ba319b5SDimitry Andric Type *ParamTy, uint64_t Offset,
714ba319b5SDimitry Andric unsigned Align,
727a7e6055SDimitry Andric unsigned DstReg) const {
737a7e6055SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
742cab237bSDimitry Andric const Function &F = MF.getFunction();
757a7e6055SDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout();
76*b5893f02SDimitry Andric PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
777a7e6055SDimitry Andric MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
787a7e6055SDimitry Andric unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
797a7e6055SDimitry Andric unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
807a7e6055SDimitry Andric
817a7e6055SDimitry Andric MachineMemOperand *MMO =
827a7e6055SDimitry Andric MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
837a7e6055SDimitry Andric MachineMemOperand::MONonTemporal |
847a7e6055SDimitry Andric MachineMemOperand::MOInvariant,
857a7e6055SDimitry Andric TypeSize, Align);
867a7e6055SDimitry Andric
877a7e6055SDimitry Andric MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
887a7e6055SDimitry Andric }
897a7e6055SDimitry Andric
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<unsigned> VRegs) const90d88c1a5aSDimitry Andric bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
91d88c1a5aSDimitry Andric const Function &F,
92d88c1a5aSDimitry Andric ArrayRef<unsigned> VRegs) const {
934ba319b5SDimitry Andric // AMDGPU_GS and AMDGP_HS are not supported yet.
944ba319b5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
954ba319b5SDimitry Andric F.getCallingConv() == CallingConv::AMDGPU_HS)
964ba319b5SDimitry Andric return false;
977a7e6055SDimitry Andric
987a7e6055SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
994ba319b5SDimitry Andric const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
1007a7e6055SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
1017a7e6055SDimitry Andric SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1024ba319b5SDimitry Andric const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1037a7e6055SDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout();
1047a7e6055SDimitry Andric
1057a7e6055SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs;
1067a7e6055SDimitry Andric CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
1077a7e6055SDimitry Andric
1087a7e6055SDimitry Andric // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
1097a7e6055SDimitry Andric if (Info->hasPrivateSegmentBuffer()) {
1107a7e6055SDimitry Andric unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
1117a7e6055SDimitry Andric MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
1127a7e6055SDimitry Andric CCInfo.AllocateReg(PrivateSegmentBufferReg);
1137a7e6055SDimitry Andric }
1147a7e6055SDimitry Andric
1157a7e6055SDimitry Andric if (Info->hasDispatchPtr()) {
1167a7e6055SDimitry Andric unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
1177a7e6055SDimitry Andric // FIXME: Need to add reg as live-in
1187a7e6055SDimitry Andric CCInfo.AllocateReg(DispatchPtrReg);
1197a7e6055SDimitry Andric }
1207a7e6055SDimitry Andric
1217a7e6055SDimitry Andric if (Info->hasQueuePtr()) {
1227a7e6055SDimitry Andric unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
1237a7e6055SDimitry Andric // FIXME: Need to add reg as live-in
1247a7e6055SDimitry Andric CCInfo.AllocateReg(QueuePtrReg);
1257a7e6055SDimitry Andric }
1267a7e6055SDimitry Andric
1277a7e6055SDimitry Andric if (Info->hasKernargSegmentPtr()) {
1287a7e6055SDimitry Andric unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
1294ba319b5SDimitry Andric const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
1307a7e6055SDimitry Andric unsigned VReg = MRI.createGenericVirtualRegister(P2);
1317a7e6055SDimitry Andric MRI.addLiveIn(InputPtrReg, VReg);
1327a7e6055SDimitry Andric MIRBuilder.getMBB().addLiveIn(InputPtrReg);
1337a7e6055SDimitry Andric MIRBuilder.buildCopy(VReg, InputPtrReg);
1347a7e6055SDimitry Andric CCInfo.AllocateReg(InputPtrReg);
1357a7e6055SDimitry Andric }
1367a7e6055SDimitry Andric
1377a7e6055SDimitry Andric if (Info->hasDispatchID()) {
1387a7e6055SDimitry Andric unsigned DispatchIDReg = Info->addDispatchID(*TRI);
1397a7e6055SDimitry Andric // FIXME: Need to add reg as live-in
1407a7e6055SDimitry Andric CCInfo.AllocateReg(DispatchIDReg);
1417a7e6055SDimitry Andric }
1427a7e6055SDimitry Andric
1437a7e6055SDimitry Andric if (Info->hasFlatScratchInit()) {
1447a7e6055SDimitry Andric unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
1457a7e6055SDimitry Andric // FIXME: Need to add reg as live-in
1467a7e6055SDimitry Andric CCInfo.AllocateReg(FlatScratchInitReg);
1477a7e6055SDimitry Andric }
1487a7e6055SDimitry Andric
1494ba319b5SDimitry Andric // The infrastructure for normal calling convention lowering is essentially
1504ba319b5SDimitry Andric // useless for kernels. We want to avoid any kind of legalization or argument
1514ba319b5SDimitry Andric // splitting.
1524ba319b5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
1534ba319b5SDimitry Andric unsigned i = 0;
1544ba319b5SDimitry Andric const unsigned KernArgBaseAlign = 16;
1554ba319b5SDimitry Andric const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
1564ba319b5SDimitry Andric uint64_t ExplicitArgOffset = 0;
1574ba319b5SDimitry Andric
1584ba319b5SDimitry Andric // TODO: Align down to dword alignment and extract bits for extending loads.
1594ba319b5SDimitry Andric for (auto &Arg : F.args()) {
1604ba319b5SDimitry Andric Type *ArgTy = Arg.getType();
1614ba319b5SDimitry Andric unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
1624ba319b5SDimitry Andric if (AllocSize == 0)
1634ba319b5SDimitry Andric continue;
1644ba319b5SDimitry Andric
1654ba319b5SDimitry Andric unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
1664ba319b5SDimitry Andric
1674ba319b5SDimitry Andric uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
1684ba319b5SDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
1694ba319b5SDimitry Andric
1704ba319b5SDimitry Andric unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
1714ba319b5SDimitry Andric ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
1724ba319b5SDimitry Andric lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
1734ba319b5SDimitry Andric ++i;
1744ba319b5SDimitry Andric }
1754ba319b5SDimitry Andric
1764ba319b5SDimitry Andric return true;
1774ba319b5SDimitry Andric }
1784ba319b5SDimitry Andric
1797a7e6055SDimitry Andric unsigned NumArgs = F.arg_size();
1807a7e6055SDimitry Andric Function::const_arg_iterator CurOrigArg = F.arg_begin();
1817a7e6055SDimitry Andric const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
1824ba319b5SDimitry Andric unsigned PSInputNum = 0;
1834ba319b5SDimitry Andric BitVector Skipped(NumArgs);
1847a7e6055SDimitry Andric for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
1852cab237bSDimitry Andric EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
1862cab237bSDimitry Andric
1872cab237bSDimitry Andric // We can only hanlde simple value types at the moment.
1887a7e6055SDimitry Andric ISD::ArgFlagsTy Flags;
1892cab237bSDimitry Andric ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
1902cab237bSDimitry Andric setArgFlags(OrigArg, i + 1, DL, F);
1917a7e6055SDimitry Andric Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
1924ba319b5SDimitry Andric
1934ba319b5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
1944ba319b5SDimitry Andric !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
1954ba319b5SDimitry Andric PSInputNum <= 15) {
1964ba319b5SDimitry Andric if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
1974ba319b5SDimitry Andric Skipped.set(i);
1984ba319b5SDimitry Andric ++PSInputNum;
1994ba319b5SDimitry Andric continue;
2004ba319b5SDimitry Andric }
2014ba319b5SDimitry Andric
2024ba319b5SDimitry Andric Info->markPSInputAllocated(PSInputNum);
2034ba319b5SDimitry Andric if (!CurOrigArg->use_empty())
2044ba319b5SDimitry Andric Info->markPSInputEnabled(PSInputNum);
2054ba319b5SDimitry Andric
2064ba319b5SDimitry Andric ++PSInputNum;
2074ba319b5SDimitry Andric }
2084ba319b5SDimitry Andric
2097a7e6055SDimitry Andric CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
2107a7e6055SDimitry Andric /*IsVarArg=*/false);
2114ba319b5SDimitry Andric
2124ba319b5SDimitry Andric if (ValEVT.isVector()) {
2134ba319b5SDimitry Andric EVT ElemVT = ValEVT.getVectorElementType();
2144ba319b5SDimitry Andric if (!ValEVT.isSimple())
2154ba319b5SDimitry Andric return false;
2164ba319b5SDimitry Andric MVT ValVT = ElemVT.getSimpleVT();
2174ba319b5SDimitry Andric bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
2184ba319b5SDimitry Andric OrigArg.Flags, CCInfo);
2194ba319b5SDimitry Andric if (!Res)
2204ba319b5SDimitry Andric return false;
2214ba319b5SDimitry Andric } else {
2224ba319b5SDimitry Andric MVT ValVT = ValEVT.getSimpleVT();
2234ba319b5SDimitry Andric if (!ValEVT.isSimple())
2244ba319b5SDimitry Andric return false;
2257a7e6055SDimitry Andric bool Res =
2262cab237bSDimitry Andric AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
2272cab237bSDimitry Andric
2282cab237bSDimitry Andric // Fail if we don't know how to handle this type.
2292cab237bSDimitry Andric if (Res)
2302cab237bSDimitry Andric return false;
2317a7e6055SDimitry Andric }
2324ba319b5SDimitry Andric }
2337a7e6055SDimitry Andric
2347a7e6055SDimitry Andric Function::const_arg_iterator Arg = F.arg_begin();
2352cab237bSDimitry Andric
2364ba319b5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
2374ba319b5SDimitry Andric F.getCallingConv() == CallingConv::AMDGPU_PS) {
2384ba319b5SDimitry Andric for (unsigned i = 0, OrigArgIdx = 0;
2394ba319b5SDimitry Andric OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
2404ba319b5SDimitry Andric if (Skipped.test(OrigArgIdx))
2414ba319b5SDimitry Andric continue;
2424ba319b5SDimitry Andric CCValAssign &VA = ArgLocs[i++];
2434ba319b5SDimitry Andric MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
2442cab237bSDimitry Andric MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
2454ba319b5SDimitry Andric MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
2462cab237bSDimitry Andric }
2472cab237bSDimitry Andric return true;
2482cab237bSDimitry Andric }
2492cab237bSDimitry Andric
2504ba319b5SDimitry Andric return false;
2513ca95b02SDimitry Andric }
252