1d8ea85acSTom Stellard //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// 2000c5af3STom Stellard // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6000c5af3STom Stellard // 7000c5af3STom Stellard //===----------------------------------------------------------------------===// 8000c5af3STom Stellard /// 9000c5af3STom Stellard /// \file 10000c5af3STom Stellard /// This file implements the lowering of LLVM calls to machine code calls for 11000c5af3STom Stellard /// GlobalISel. 12000c5af3STom Stellard /// 13000c5af3STom Stellard //===----------------------------------------------------------------------===// 14000c5af3STom Stellard 15000c5af3STom Stellard #include "AMDGPUCallLowering.h" 16ca16621bSTom Stellard #include "AMDGPU.h" 17000c5af3STom Stellard #include "AMDGPUISelLowering.h" 18ca16621bSTom Stellard #include "AMDGPUSubtarget.h" 19ca16621bSTom Stellard #include "SIISelLowering.h" 20ca16621bSTom Stellard #include "SIMachineFunctionInfo.h" 216bda14b3SChandler Carruth #include "SIRegisterInfo.h" 2244b30b45STom Stellard #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23206b9927STom Stellard #include "llvm/CodeGen/Analysis.h" 24ca16621bSTom Stellard #include "llvm/CodeGen/CallingConvLower.h" 25000c5af3STom Stellard #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26000c5af3STom Stellard #include "llvm/CodeGen/MachineInstrBuilder.h" 27206b9927STom Stellard #include "llvm/Support/LowLevelTypeImpl.h" 28000c5af3STom Stellard 29000c5af3STom Stellard using namespace llvm; 30000c5af3STom Stellard 31206b9927STom Stellard namespace { 32206b9927STom Stellard 33206b9927STom Stellard struct OutgoingArgHandler : public CallLowering::ValueHandler { 34206b9927STom Stellard OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 35206b9927STom Stellard MachineInstrBuilder MIB, CCAssignFn *AssignFn) 36206b9927STom Stellard : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} 37206b9927STom Stellard 38206b9927STom Stellard MachineInstrBuilder MIB; 39206b9927STom Stellard 40faeaedf8SMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 41206b9927STom Stellard MachinePointerInfo &MPO) override { 42206b9927STom Stellard llvm_unreachable("not implemented"); 43206b9927STom Stellard } 44206b9927STom Stellard 45faeaedf8SMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 46206b9927STom Stellard MachinePointerInfo &MPO, CCValAssign &VA) override { 47206b9927STom Stellard llvm_unreachable("not implemented"); 48206b9927STom Stellard } 49206b9927STom Stellard 50faeaedf8SMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 51206b9927STom Stellard CCValAssign &VA) override { 52206b9927STom Stellard MIB.addUse(PhysReg); 53206b9927STom Stellard MIRBuilder.buildCopy(PhysReg, ValVReg); 54206b9927STom Stellard } 55206b9927STom Stellard 56206b9927STom Stellard bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, 57206b9927STom Stellard CCValAssign::LocInfo LocInfo, 58206b9927STom Stellard const CallLowering::ArgInfo &Info, 59206b9927STom Stellard CCState &State) override { 60206b9927STom Stellard return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); 61206b9927STom Stellard } 62206b9927STom Stellard }; 63206b9927STom Stellard 64fecf43ebSMatt Arsenault struct IncomingArgHandler : public CallLowering::ValueHandler { 65fecf43ebSMatt Arsenault uint64_t StackUsed = 0; 66fecf43ebSMatt Arsenault 67fecf43ebSMatt Arsenault IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 68fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 69fecf43ebSMatt Arsenault : ValueHandler(MIRBuilder, MRI, AssignFn) {} 70fecf43ebSMatt Arsenault 71fecf43ebSMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 72fecf43ebSMatt Arsenault MachinePointerInfo &MPO) override { 73fecf43ebSMatt Arsenault auto &MFI = MIRBuilder.getMF().getFrameInfo(); 74fecf43ebSMatt Arsenault int FI = MFI.CreateFixedObject(Size, Offset, true); 75fecf43ebSMatt Arsenault MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); 76fecf43ebSMatt Arsenault Register AddrReg = MRI.createGenericVirtualRegister( 77fecf43ebSMatt Arsenault LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); 78fecf43ebSMatt Arsenault MIRBuilder.buildFrameIndex(AddrReg, FI); 79fecf43ebSMatt Arsenault StackUsed = std::max(StackUsed, Size + Offset); 80fecf43ebSMatt Arsenault return AddrReg; 81fecf43ebSMatt Arsenault } 82fecf43ebSMatt Arsenault 83fecf43ebSMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 84fecf43ebSMatt Arsenault CCValAssign &VA) override { 85fecf43ebSMatt Arsenault markPhysRegUsed(PhysReg); 86fecf43ebSMatt Arsenault 87fecf43ebSMatt Arsenault if (VA.getLocVT().getSizeInBits() < 32) { 88fecf43ebSMatt Arsenault // 16-bit types are reported as legal for 32-bit registers. We need to do 89fecf43ebSMatt Arsenault // a 32-bit copy, and truncate to avoid the verifier complaining about it. 90fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); 91fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 92fecf43ebSMatt Arsenault return; 93fecf43ebSMatt Arsenault } 94fecf43ebSMatt Arsenault 95fecf43ebSMatt Arsenault switch (VA.getLocInfo()) { 96fecf43ebSMatt Arsenault case CCValAssign::LocInfo::SExt: 97fecf43ebSMatt Arsenault case CCValAssign::LocInfo::ZExt: 98fecf43ebSMatt Arsenault case CCValAssign::LocInfo::AExt: { 99fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); 100fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 101fecf43ebSMatt Arsenault break; 102fecf43ebSMatt Arsenault } 103fecf43ebSMatt Arsenault default: 104fecf43ebSMatt Arsenault MIRBuilder.buildCopy(ValVReg, PhysReg); 105fecf43ebSMatt Arsenault break; 106fecf43ebSMatt Arsenault } 107fecf43ebSMatt Arsenault } 108fecf43ebSMatt Arsenault 109fecf43ebSMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 110fecf43ebSMatt Arsenault MachinePointerInfo &MPO, CCValAssign &VA) override { 111fecf43ebSMatt Arsenault // FIXME: Get alignment 112fecf43ebSMatt Arsenault auto MMO = MIRBuilder.getMF().getMachineMemOperand( 113fecf43ebSMatt Arsenault MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); 114fecf43ebSMatt Arsenault MIRBuilder.buildLoad(ValVReg, Addr, *MMO); 115fecf43ebSMatt Arsenault } 116fecf43ebSMatt Arsenault 117fecf43ebSMatt Arsenault /// How the physical register gets marked varies between formal 118fecf43ebSMatt Arsenault /// parameters (it's a basic-block live-in), and a call instruction 119fecf43ebSMatt Arsenault /// (it's an implicit-def of the BL). 120fecf43ebSMatt Arsenault virtual void markPhysRegUsed(unsigned PhysReg) = 0; 121fecf43ebSMatt Arsenault 122fecf43ebSMatt Arsenault // FIXME: What is the point of this being a callback? 123fecf43ebSMatt Arsenault bool isArgumentHandler() const override { return true; } 124fecf43ebSMatt Arsenault }; 125fecf43ebSMatt Arsenault 126fecf43ebSMatt Arsenault struct FormalArgHandler : public IncomingArgHandler { 127fecf43ebSMatt Arsenault FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 128fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 129fecf43ebSMatt Arsenault : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} 130fecf43ebSMatt Arsenault 131fecf43ebSMatt Arsenault void markPhysRegUsed(unsigned PhysReg) override { 132fecf43ebSMatt Arsenault MIRBuilder.getMBB().addLiveIn(PhysReg); 133fecf43ebSMatt Arsenault } 134fecf43ebSMatt Arsenault }; 135fecf43ebSMatt Arsenault 136206b9927STom Stellard } 137206b9927STom Stellard 138000c5af3STom Stellard AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) 1390da6350dSMatt Arsenault : CallLowering(&TLI) { 140000c5af3STom Stellard } 141000c5af3STom Stellard 142fecf43ebSMatt Arsenault void AMDGPUCallLowering::splitToValueTypes( 143fecf43ebSMatt Arsenault const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, 144fecf43ebSMatt Arsenault const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, 145fecf43ebSMatt Arsenault SplitArgTy PerformArgSplit) const { 146fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 147fecf43ebSMatt Arsenault LLVMContext &Ctx = OrigArg.Ty->getContext(); 148fecf43ebSMatt Arsenault 149fecf43ebSMatt Arsenault if (OrigArg.Ty->isVoidTy()) 150fecf43ebSMatt Arsenault return; 151fecf43ebSMatt Arsenault 152fecf43ebSMatt Arsenault SmallVector<EVT, 4> SplitVTs; 153fecf43ebSMatt Arsenault ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); 154fecf43ebSMatt Arsenault 155b60a2ae4SMatt Arsenault assert(OrigArg.Regs.size() == SplitVTs.size()); 156b60a2ae4SMatt Arsenault 157b60a2ae4SMatt Arsenault int SplitIdx = 0; 158b60a2ae4SMatt Arsenault for (EVT VT : SplitVTs) { 159fecf43ebSMatt Arsenault unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); 160b60a2ae4SMatt Arsenault Type *Ty = VT.getTypeForEVT(Ctx); 161b60a2ae4SMatt Arsenault 162b60a2ae4SMatt Arsenault 163fecf43ebSMatt Arsenault 164fecf43ebSMatt Arsenault if (NumParts == 1) { 165fecf43ebSMatt Arsenault // No splitting to do, but we want to replace the original type (e.g. [1 x 166fecf43ebSMatt Arsenault // double] -> double). 167b60a2ae4SMatt Arsenault SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, 168fecf43ebSMatt Arsenault OrigArg.Flags, OrigArg.IsFixed); 169b60a2ae4SMatt Arsenault 170b60a2ae4SMatt Arsenault ++SplitIdx; 171b60a2ae4SMatt Arsenault continue; 172fecf43ebSMatt Arsenault } 173fecf43ebSMatt Arsenault 174b60a2ae4SMatt Arsenault LLT LLTy = getLLTForType(*Ty, DL); 175b60a2ae4SMatt Arsenault 176fecf43ebSMatt Arsenault SmallVector<Register, 8> SplitRegs; 177fecf43ebSMatt Arsenault 178fecf43ebSMatt Arsenault EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); 179fecf43ebSMatt Arsenault Type *PartTy = PartVT.getTypeForEVT(Ctx); 180fecf43ebSMatt Arsenault LLT PartLLT = getLLTForType(*PartTy, DL); 181fecf43ebSMatt Arsenault 182fecf43ebSMatt Arsenault // FIXME: Should we be reporting all of the part registers for a single 183fecf43ebSMatt Arsenault // argument, and let handleAssignments take care of the repacking? 184fecf43ebSMatt Arsenault for (unsigned i = 0; i < NumParts; ++i) { 185fecf43ebSMatt Arsenault Register PartReg = MRI.createGenericVirtualRegister(PartLLT); 186fecf43ebSMatt Arsenault SplitRegs.push_back(PartReg); 187fecf43ebSMatt Arsenault SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); 188fecf43ebSMatt Arsenault } 189fecf43ebSMatt Arsenault 190b60a2ae4SMatt Arsenault PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); 191b60a2ae4SMatt Arsenault 192b60a2ae4SMatt Arsenault ++SplitIdx; 193b60a2ae4SMatt Arsenault } 194fecf43ebSMatt Arsenault } 195fecf43ebSMatt Arsenault 196000c5af3STom Stellard bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, 19749168f67SAlexander Ivchenko const Value *Val, 198e3a676e9SMatt Arsenault ArrayRef<Register> VRegs) const { 199206b9927STom Stellard 200206b9927STom Stellard MachineFunction &MF = MIRBuilder.getMF(); 201206b9927STom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 202206b9927STom Stellard SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 203206b9927STom Stellard MFI->setIfReturnsVoid(!Val); 204206b9927STom Stellard 205206b9927STom Stellard if (!Val) { 206206b9927STom Stellard MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); 207206b9927STom Stellard return true; 208206b9927STom Stellard } 209206b9927STom Stellard 210e3a676e9SMatt Arsenault Register VReg = VRegs[0]; 211206b9927STom Stellard 212206b9927STom Stellard const Function &F = MF.getFunction(); 213206b9927STom Stellard auto &DL = F.getParent()->getDataLayout(); 214206b9927STom Stellard if (!AMDGPU::isShader(F.getCallingConv())) 215257882ffSTom Stellard return false; 216257882ffSTom Stellard 217206b9927STom Stellard 218206b9927STom Stellard const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); 219206b9927STom Stellard SmallVector<EVT, 4> SplitVTs; 220206b9927STom Stellard SmallVector<uint64_t, 4> Offsets; 221206b9927STom Stellard ArgInfo OrigArg{VReg, Val->getType()}; 222206b9927STom Stellard setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); 223206b9927STom Stellard ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); 224206b9927STom Stellard 225206b9927STom Stellard SmallVector<ArgInfo, 8> SplitArgs; 226206b9927STom Stellard CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); 227206b9927STom Stellard for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { 228206b9927STom Stellard Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); 229206b9927STom Stellard SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); 230206b9927STom Stellard } 231206b9927STom Stellard auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); 232206b9927STom Stellard OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); 233206b9927STom Stellard if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) 234206b9927STom Stellard return false; 235206b9927STom Stellard MIRBuilder.insertInstr(RetInstr); 236206b9927STom Stellard 237000c5af3STom Stellard return true; 238000c5af3STom Stellard } 239000c5af3STom Stellard 240faeaedf8SMatt Arsenault Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, 241ca16621bSTom Stellard Type *ParamTy, 24229f30379SMatt Arsenault uint64_t Offset) const { 243ca16621bSTom Stellard 244ca16621bSTom Stellard MachineFunction &MF = MIRBuilder.getMF(); 2458623e8d8SMatt Arsenault const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 246ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 247f1caa283SMatthias Braun const Function &F = MF.getFunction(); 248ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 2490da6350dSMatt Arsenault PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 25052b4ce72SDaniel Sanders LLT PtrType = getLLTForType(*PtrTy, DL); 251faeaedf8SMatt Arsenault Register DstReg = MRI.createGenericVirtualRegister(PtrType); 252faeaedf8SMatt Arsenault Register KernArgSegmentPtr = 2538623e8d8SMatt Arsenault MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 254faeaedf8SMatt Arsenault Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); 255ca16621bSTom Stellard 256faeaedf8SMatt Arsenault Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 257ca16621bSTom Stellard MIRBuilder.buildConstant(OffsetReg, Offset); 258ca16621bSTom Stellard 259ca16621bSTom Stellard MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); 260ca16621bSTom Stellard 261ca16621bSTom Stellard return DstReg; 262ca16621bSTom Stellard } 263ca16621bSTom Stellard 264ca16621bSTom Stellard void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, 26529f30379SMatt Arsenault Type *ParamTy, uint64_t Offset, 26629f30379SMatt Arsenault unsigned Align, 267e3a676e9SMatt Arsenault Register DstReg) const { 268ca16621bSTom Stellard MachineFunction &MF = MIRBuilder.getMF(); 269f1caa283SMatthias Braun const Function &F = MF.getFunction(); 270ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 2710da6350dSMatt Arsenault PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 272ca16621bSTom Stellard MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); 273ca16621bSTom Stellard unsigned TypeSize = DL.getTypeStoreSize(ParamTy); 274e3a676e9SMatt Arsenault Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); 275ca16621bSTom Stellard 276ca16621bSTom Stellard MachineMemOperand *MMO = 277ca16621bSTom Stellard MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | 278*7df225dfSMatt Arsenault MachineMemOperand::MODereferenceable | 279ca16621bSTom Stellard MachineMemOperand::MOInvariant, 280ca16621bSTom Stellard TypeSize, Align); 281ca16621bSTom Stellard 282ca16621bSTom Stellard MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); 283ca16621bSTom Stellard } 284ca16621bSTom Stellard 285bae3636fSMatt Arsenault // Allocate special inputs passed in user SGPRs. 286bae3636fSMatt Arsenault static void allocateHSAUserSGPRs(CCState &CCInfo, 287bae3636fSMatt Arsenault MachineIRBuilder &MIRBuilder, 288bae3636fSMatt Arsenault MachineFunction &MF, 289bae3636fSMatt Arsenault const SIRegisterInfo &TRI, 290bae3636fSMatt Arsenault SIMachineFunctionInfo &Info) { 291bae3636fSMatt Arsenault // FIXME: How should these inputs interact with inreg / custom SGPR inputs? 292bae3636fSMatt Arsenault if (Info.hasPrivateSegmentBuffer()) { 293bae3636fSMatt Arsenault unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); 294bae3636fSMatt Arsenault MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); 295bae3636fSMatt Arsenault CCInfo.AllocateReg(PrivateSegmentBufferReg); 296bae3636fSMatt Arsenault } 297bae3636fSMatt Arsenault 298bae3636fSMatt Arsenault if (Info.hasDispatchPtr()) { 299bae3636fSMatt Arsenault unsigned DispatchPtrReg = Info.addDispatchPtr(TRI); 300bae3636fSMatt Arsenault MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); 301bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchPtrReg); 302bae3636fSMatt Arsenault } 303bae3636fSMatt Arsenault 304bae3636fSMatt Arsenault if (Info.hasQueuePtr()) { 305bae3636fSMatt Arsenault unsigned QueuePtrReg = Info.addQueuePtr(TRI); 306bae3636fSMatt Arsenault MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); 307bae3636fSMatt Arsenault CCInfo.AllocateReg(QueuePtrReg); 308bae3636fSMatt Arsenault } 309bae3636fSMatt Arsenault 310bae3636fSMatt Arsenault if (Info.hasKernargSegmentPtr()) { 311bae3636fSMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 312bae3636fSMatt Arsenault Register InputPtrReg = Info.addKernargSegmentPtr(TRI); 313bae3636fSMatt Arsenault const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); 314bae3636fSMatt Arsenault Register VReg = MRI.createGenericVirtualRegister(P4); 315bae3636fSMatt Arsenault MRI.addLiveIn(InputPtrReg, VReg); 316bae3636fSMatt Arsenault MIRBuilder.getMBB().addLiveIn(InputPtrReg); 317bae3636fSMatt Arsenault MIRBuilder.buildCopy(VReg, InputPtrReg); 318bae3636fSMatt Arsenault CCInfo.AllocateReg(InputPtrReg); 319bae3636fSMatt Arsenault } 320bae3636fSMatt Arsenault 321bae3636fSMatt Arsenault if (Info.hasDispatchID()) { 322bae3636fSMatt Arsenault unsigned DispatchIDReg = Info.addDispatchID(TRI); 323bae3636fSMatt Arsenault MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); 324bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchIDReg); 325bae3636fSMatt Arsenault } 326bae3636fSMatt Arsenault 327bae3636fSMatt Arsenault if (Info.hasFlatScratchInit()) { 328bae3636fSMatt Arsenault unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI); 329bae3636fSMatt Arsenault MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); 330bae3636fSMatt Arsenault CCInfo.AllocateReg(FlatScratchInitReg); 331bae3636fSMatt Arsenault } 332bae3636fSMatt Arsenault 333bae3636fSMatt Arsenault // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read 334bae3636fSMatt Arsenault // these from the dispatch pointer. 335bae3636fSMatt Arsenault } 336bae3636fSMatt Arsenault 337b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArgumentsKernel( 338c3dbe239SDiana Picus MachineIRBuilder &MIRBuilder, const Function &F, 339c3dbe239SDiana Picus ArrayRef<ArrayRef<Register>> VRegs) const { 340ca16621bSTom Stellard MachineFunction &MF = MIRBuilder.getMF(); 3415bfbae5cSTom Stellard const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); 342ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 343ca16621bSTom Stellard SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 344fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); 345fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 346fecf43ebSMatt Arsenault 347ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 348ca16621bSTom Stellard 349ca16621bSTom Stellard SmallVector<CCValAssign, 16> ArgLocs; 350ca16621bSTom Stellard CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); 351ca16621bSTom Stellard 352bae3636fSMatt Arsenault allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); 353bae3636fSMatt Arsenault 35429f30379SMatt Arsenault unsigned i = 0; 35529f30379SMatt Arsenault const unsigned KernArgBaseAlign = 16; 35629f30379SMatt Arsenault const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); 35729f30379SMatt Arsenault uint64_t ExplicitArgOffset = 0; 35829f30379SMatt Arsenault 35929f30379SMatt Arsenault // TODO: Align down to dword alignment and extract bits for extending loads. 36029f30379SMatt Arsenault for (auto &Arg : F.args()) { 36129f30379SMatt Arsenault Type *ArgTy = Arg.getType(); 36229f30379SMatt Arsenault unsigned AllocSize = DL.getTypeAllocSize(ArgTy); 36329f30379SMatt Arsenault if (AllocSize == 0) 36429f30379SMatt Arsenault continue; 36529f30379SMatt Arsenault 36629f30379SMatt Arsenault unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); 36729f30379SMatt Arsenault 36829f30379SMatt Arsenault uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; 36929f30379SMatt Arsenault ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; 37029f30379SMatt Arsenault 371c3dbe239SDiana Picus ArrayRef<Register> OrigArgRegs = VRegs[i]; 372c3dbe239SDiana Picus Register ArgReg = 373c3dbe239SDiana Picus OrigArgRegs.size() == 1 374c3dbe239SDiana Picus ? OrigArgRegs[0] 375c3dbe239SDiana Picus : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); 37629f30379SMatt Arsenault unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); 37729f30379SMatt Arsenault ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); 378c3dbe239SDiana Picus lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); 379c3dbe239SDiana Picus if (OrigArgRegs.size() > 1) 380c3dbe239SDiana Picus unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); 38129f30379SMatt Arsenault ++i; 38229f30379SMatt Arsenault } 38329f30379SMatt Arsenault 384fecf43ebSMatt Arsenault TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); 385fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); 38629f30379SMatt Arsenault return true; 38729f30379SMatt Arsenault } 38829f30379SMatt Arsenault 389fecf43ebSMatt Arsenault static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder, 390fecf43ebSMatt Arsenault ArrayRef<Register> OrigRegs, 391fecf43ebSMatt Arsenault ArrayRef<Register> Regs, 392fecf43ebSMatt Arsenault LLT LLTy, 393fecf43ebSMatt Arsenault LLT PartLLT) { 394fecf43ebSMatt Arsenault if (!LLTy.isVector() && !PartLLT.isVector()) { 395fecf43ebSMatt Arsenault MIRBuilder.buildMerge(OrigRegs[0], Regs); 396fecf43ebSMatt Arsenault return; 397fecf43ebSMatt Arsenault } 398fecf43ebSMatt Arsenault 399fecf43ebSMatt Arsenault if (LLTy.isVector() && PartLLT.isVector()) { 400fecf43ebSMatt Arsenault assert(LLTy.getElementType() == PartLLT.getElementType()); 401fecf43ebSMatt Arsenault 402fecf43ebSMatt Arsenault int DstElts = LLTy.getNumElements(); 403fecf43ebSMatt Arsenault int PartElts = PartLLT.getNumElements(); 404fecf43ebSMatt Arsenault if (DstElts % PartElts == 0) 405fecf43ebSMatt Arsenault MIRBuilder.buildConcatVectors(OrigRegs[0], Regs); 406fecf43ebSMatt Arsenault else { 407fecf43ebSMatt Arsenault // Deal with v3s16 split into v2s16 408fecf43ebSMatt Arsenault assert(PartElts == 2 && DstElts % 2 != 0); 409fecf43ebSMatt Arsenault int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); 410fecf43ebSMatt Arsenault 411fecf43ebSMatt Arsenault LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); 412fecf43ebSMatt Arsenault auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs); 413fecf43ebSMatt Arsenault MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0); 414fecf43ebSMatt Arsenault } 415fecf43ebSMatt Arsenault 416fecf43ebSMatt Arsenault return; 417fecf43ebSMatt Arsenault } 418fecf43ebSMatt Arsenault 419fecf43ebSMatt Arsenault assert(LLTy.isVector() && !PartLLT.isVector()); 420fecf43ebSMatt Arsenault 421fecf43ebSMatt Arsenault LLT DstEltTy = LLTy.getElementType(); 422fecf43ebSMatt Arsenault if (DstEltTy == PartLLT) { 423fecf43ebSMatt Arsenault // Vector was trivially scalarized. 424fecf43ebSMatt Arsenault MIRBuilder.buildBuildVector(OrigRegs[0], Regs); 425fecf43ebSMatt Arsenault } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { 426fecf43ebSMatt Arsenault // Deal with vector with 64-bit elements decomposed to 32-bit 427fecf43ebSMatt Arsenault // registers. Need to create intermediate 64-bit elements. 428fecf43ebSMatt Arsenault SmallVector<Register, 8> EltMerges; 429fecf43ebSMatt Arsenault int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); 430fecf43ebSMatt Arsenault 431fecf43ebSMatt Arsenault assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); 432fecf43ebSMatt Arsenault 433fecf43ebSMatt Arsenault for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { 434fecf43ebSMatt Arsenault auto Merge = MIRBuilder.buildMerge(DstEltTy, 435fecf43ebSMatt Arsenault Regs.take_front(PartsPerElt)); 436fecf43ebSMatt Arsenault EltMerges.push_back(Merge.getReg(0)); 437fecf43ebSMatt Arsenault Regs = Regs.drop_front(PartsPerElt); 438fecf43ebSMatt Arsenault } 439fecf43ebSMatt Arsenault 440fecf43ebSMatt Arsenault MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges); 441fecf43ebSMatt Arsenault } else { 442fecf43ebSMatt Arsenault // Vector was split, and elements promoted to a wider type. 443fecf43ebSMatt Arsenault LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); 444fecf43ebSMatt Arsenault auto BV = MIRBuilder.buildBuildVector(BVType, Regs); 445fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(OrigRegs[0], BV); 446fecf43ebSMatt Arsenault } 447fecf43ebSMatt Arsenault } 448fecf43ebSMatt Arsenault 449b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArguments( 450b725d273SMatt Arsenault MachineIRBuilder &MIRBuilder, const Function &F, 451b725d273SMatt Arsenault ArrayRef<ArrayRef<Register>> VRegs) const { 452fecf43ebSMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 453fecf43ebSMatt Arsenault 454b725d273SMatt Arsenault // The infrastructure for normal calling convention lowering is essentially 455b725d273SMatt Arsenault // useless for kernels. We want to avoid any kind of legalization or argument 456b725d273SMatt Arsenault // splitting. 457fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_KERNEL) 458b725d273SMatt Arsenault return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); 459b725d273SMatt Arsenault 460b725d273SMatt Arsenault // AMDGPU_GS and AMDGP_HS are not supported yet. 461fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS) 462b725d273SMatt Arsenault return false; 463b725d273SMatt Arsenault 464fecf43ebSMatt Arsenault const bool IsShader = AMDGPU::isShader(CC); 465fecf43ebSMatt Arsenault const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); 466fecf43ebSMatt Arsenault 467b725d273SMatt Arsenault MachineFunction &MF = MIRBuilder.getMF(); 468fecf43ebSMatt Arsenault MachineBasicBlock &MBB = MIRBuilder.getMBB(); 469b725d273SMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 470b725d273SMatt Arsenault SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 471fecf43ebSMatt Arsenault const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 472fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); 473b725d273SMatt Arsenault const DataLayout &DL = F.getParent()->getDataLayout(); 474b725d273SMatt Arsenault 475b725d273SMatt Arsenault 476b725d273SMatt Arsenault SmallVector<CCValAssign, 16> ArgLocs; 477fecf43ebSMatt Arsenault CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); 478b725d273SMatt Arsenault 479bae3636fSMatt Arsenault if (Info->hasImplicitBufferPtr()) { 480fecf43ebSMatt Arsenault Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); 481bae3636fSMatt Arsenault MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); 482bae3636fSMatt Arsenault CCInfo.AllocateReg(ImplicitBufferPtrReg); 483bae3636fSMatt Arsenault } 484bae3636fSMatt Arsenault 485fecf43ebSMatt Arsenault 486fecf43ebSMatt Arsenault SmallVector<ArgInfo, 32> SplitArgs; 487fecf43ebSMatt Arsenault unsigned Idx = 0; 488c7709e1cSTom Stellard unsigned PSInputNum = 0; 4899d8337d8STom Stellard 490fecf43ebSMatt Arsenault for (auto &Arg : F.args()) { 491fecf43ebSMatt Arsenault if (DL.getTypeStoreSize(Arg.getType()) == 0) 492c7709e1cSTom Stellard continue; 493c7709e1cSTom Stellard 494fecf43ebSMatt Arsenault const bool InReg = Arg.hasAttribute(Attribute::InReg); 495fecf43ebSMatt Arsenault 496fecf43ebSMatt Arsenault // SGPR arguments to functions not implemented. 497fecf43ebSMatt Arsenault if (!IsShader && InReg) 498fecf43ebSMatt Arsenault return false; 499fecf43ebSMatt Arsenault 500b60a2ae4SMatt Arsenault // TODO: Handle sret. 501fecf43ebSMatt Arsenault if (Arg.hasAttribute(Attribute::StructRet) || 502fecf43ebSMatt Arsenault Arg.hasAttribute(Attribute::SwiftSelf) || 503fecf43ebSMatt Arsenault Arg.hasAttribute(Attribute::SwiftError) || 504b60a2ae4SMatt Arsenault Arg.hasAttribute(Attribute::Nest)) 505fecf43ebSMatt Arsenault return false; 506fecf43ebSMatt Arsenault 507fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { 508fecf43ebSMatt Arsenault const bool ArgUsed = !Arg.use_empty(); 509fecf43ebSMatt Arsenault bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); 510fecf43ebSMatt Arsenault 511fecf43ebSMatt Arsenault if (!SkipArg) { 512c7709e1cSTom Stellard Info->markPSInputAllocated(PSInputNum); 513fecf43ebSMatt Arsenault if (ArgUsed) 514c7709e1cSTom Stellard Info->markPSInputEnabled(PSInputNum); 515fecf43ebSMatt Arsenault } 516c7709e1cSTom Stellard 517c7709e1cSTom Stellard ++PSInputNum; 518c7709e1cSTom Stellard 519fecf43ebSMatt Arsenault if (SkipArg) { 520b60a2ae4SMatt Arsenault for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) 521b60a2ae4SMatt Arsenault MIRBuilder.buildUndef(VRegs[Idx][I]); 522b60a2ae4SMatt Arsenault 523fecf43ebSMatt Arsenault ++Idx; 524c7709e1cSTom Stellard continue; 525fecf43ebSMatt Arsenault } 5269d8337d8STom Stellard } 527e0a4da8cSMatt Arsenault 528fecf43ebSMatt Arsenault ArgInfo OrigArg(VRegs[Idx], Arg.getType()); 529fecf43ebSMatt Arsenault setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); 530b60a2ae4SMatt Arsenault 531b60a2ae4SMatt Arsenault splitToValueTypes( 532b60a2ae4SMatt Arsenault OrigArg, SplitArgs, DL, MRI, CC, 533fecf43ebSMatt Arsenault // FIXME: We should probably be passing multiple registers to 534fecf43ebSMatt Arsenault // handleAssignments to do this 535b60a2ae4SMatt Arsenault [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { 536b60a2ae4SMatt Arsenault packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs, 537b60a2ae4SMatt Arsenault LLTy, PartLLT); 538fecf43ebSMatt Arsenault }); 539fecf43ebSMatt Arsenault 540fecf43ebSMatt Arsenault ++Idx; 5419d8337d8STom Stellard } 5429d8337d8STom Stellard 543fecf43ebSMatt Arsenault // At least one interpolation mode must be enabled or else the GPU will 544fecf43ebSMatt Arsenault // hang. 545fecf43ebSMatt Arsenault // 546fecf43ebSMatt Arsenault // Check PSInputAddr instead of PSInputEnable. The idea is that if the user 547fecf43ebSMatt Arsenault // set PSInputAddr, the user wants to enable some bits after the compilation 548fecf43ebSMatt Arsenault // based on run-time states. Since we can't know what the final PSInputEna 549fecf43ebSMatt Arsenault // will look like, so we shouldn't do anything here and the user should take 550fecf43ebSMatt Arsenault // responsibility for the correct programming. 551fecf43ebSMatt Arsenault // 552fecf43ebSMatt Arsenault // Otherwise, the following restrictions apply: 553fecf43ebSMatt Arsenault // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. 554fecf43ebSMatt Arsenault // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be 555fecf43ebSMatt Arsenault // enabled too. 556fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS) { 557fecf43ebSMatt Arsenault if ((Info->getPSInputAddr() & 0x7F) == 0 || 558fecf43ebSMatt Arsenault ((Info->getPSInputAddr() & 0xF) == 0 && 559fecf43ebSMatt Arsenault Info->isPSInputAllocated(11))) { 560fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR0); 561fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR1); 562fecf43ebSMatt Arsenault Info->markPSInputAllocated(0); 563fecf43ebSMatt Arsenault Info->markPSInputEnabled(0); 564fecf43ebSMatt Arsenault } 565fecf43ebSMatt Arsenault 566fecf43ebSMatt Arsenault if (Subtarget.isAmdPalOS()) { 567fecf43ebSMatt Arsenault // For isAmdPalOS, the user does not enable some bits after compilation 568fecf43ebSMatt Arsenault // based on run-time states; the register values being generated here are 569fecf43ebSMatt Arsenault // the final ones set in hardware. Therefore we need to apply the 570fecf43ebSMatt Arsenault // workaround to PSInputAddr and PSInputEnable together. (The case where 571fecf43ebSMatt Arsenault // a bit is set in PSInputAddr but not PSInputEnable is where the frontend 572fecf43ebSMatt Arsenault // set up an input arg for a particular interpolation mode, but nothing 573fecf43ebSMatt Arsenault // uses that input arg. Really we should have an earlier pass that removes 574fecf43ebSMatt Arsenault // such an arg.) 575fecf43ebSMatt Arsenault unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); 576fecf43ebSMatt Arsenault if ((PsInputBits & 0x7F) == 0 || 577fecf43ebSMatt Arsenault ((PsInputBits & 0xF) == 0 && 578fecf43ebSMatt Arsenault (PsInputBits >> 11 & 1))) 579fecf43ebSMatt Arsenault Info->markPSInputEnabled( 580fecf43ebSMatt Arsenault countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); 581fecf43ebSMatt Arsenault } 582fecf43ebSMatt Arsenault } 583fecf43ebSMatt Arsenault 584fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 585fecf43ebSMatt Arsenault CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); 586fecf43ebSMatt Arsenault 587fecf43ebSMatt Arsenault if (!MBB.empty()) 588fecf43ebSMatt Arsenault MIRBuilder.setInstr(*MBB.begin()); 589fecf43ebSMatt Arsenault 590fecf43ebSMatt Arsenault FormalArgHandler Handler(MIRBuilder, MRI, AssignFn); 591fecf43ebSMatt Arsenault if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler)) 59229f30379SMatt Arsenault return false; 593fecf43ebSMatt Arsenault 594fecf43ebSMatt Arsenault if (!IsEntryFunc) { 595fecf43ebSMatt Arsenault // Special inputs come after user arguments. 596fecf43ebSMatt Arsenault TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); 597fecf43ebSMatt Arsenault } 598fecf43ebSMatt Arsenault 599fecf43ebSMatt Arsenault // Start adding system SGPRs. 600fecf43ebSMatt Arsenault if (IsEntryFunc) { 601fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); 602fecf43ebSMatt Arsenault } else { 603fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getScratchRSrcReg()); 604fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); 605fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getFrameOffsetReg()); 606fecf43ebSMatt Arsenault TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); 607fecf43ebSMatt Arsenault } 608fecf43ebSMatt Arsenault 609fecf43ebSMatt Arsenault // Move back to the end of the basic block. 610fecf43ebSMatt Arsenault MIRBuilder.setMBB(MBB); 611fecf43ebSMatt Arsenault 612fecf43ebSMatt Arsenault return true; 613000c5af3STom Stellard } 614