1d8ea85acSTom Stellard //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// 2000c5af3STom Stellard // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6000c5af3STom Stellard // 7000c5af3STom Stellard //===----------------------------------------------------------------------===// 8000c5af3STom Stellard /// 9000c5af3STom Stellard /// \file 10000c5af3STom Stellard /// This file implements the lowering of LLVM calls to machine code calls for 11000c5af3STom Stellard /// GlobalISel. 12000c5af3STom Stellard /// 13000c5af3STom Stellard //===----------------------------------------------------------------------===// 14000c5af3STom Stellard 15000c5af3STom Stellard #include "AMDGPUCallLowering.h" 16ca16621bSTom Stellard #include "AMDGPU.h" 17000c5af3STom Stellard #include "AMDGPUISelLowering.h" 18ca16621bSTom Stellard #include "AMDGPUSubtarget.h" 19ca16621bSTom Stellard #include "SIISelLowering.h" 20ca16621bSTom Stellard #include "SIMachineFunctionInfo.h" 216bda14b3SChandler Carruth #include "SIRegisterInfo.h" 2244b30b45STom Stellard #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23206b9927STom Stellard #include "llvm/CodeGen/Analysis.h" 24ca16621bSTom Stellard #include "llvm/CodeGen/CallingConvLower.h" 25000c5af3STom Stellard #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26000c5af3STom Stellard #include "llvm/CodeGen/MachineInstrBuilder.h" 27206b9927STom Stellard #include "llvm/Support/LowLevelTypeImpl.h" 28000c5af3STom Stellard 29000c5af3STom Stellard using namespace llvm; 30000c5af3STom Stellard 31206b9927STom Stellard namespace { 32206b9927STom Stellard 33a9ea8a9aSMatt Arsenault struct OutgoingValueHandler : public CallLowering::ValueHandler { 34a9ea8a9aSMatt Arsenault OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 35206b9927STom Stellard MachineInstrBuilder MIB, CCAssignFn *AssignFn) 36206b9927STom Stellard : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} 37206b9927STom Stellard 38206b9927STom Stellard MachineInstrBuilder MIB; 39206b9927STom Stellard 40faeaedf8SMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 41206b9927STom Stellard MachinePointerInfo &MPO) override { 42206b9927STom Stellard llvm_unreachable("not implemented"); 43206b9927STom Stellard } 44206b9927STom Stellard 45faeaedf8SMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 46206b9927STom Stellard MachinePointerInfo &MPO, CCValAssign &VA) override { 47206b9927STom Stellard llvm_unreachable("not implemented"); 48206b9927STom Stellard } 49206b9927STom Stellard 50faeaedf8SMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 51206b9927STom Stellard CCValAssign &VA) override { 52a9ea8a9aSMatt Arsenault Register ExtReg; 53a9ea8a9aSMatt Arsenault if (VA.getLocVT().getSizeInBits() < 32) { 54a9ea8a9aSMatt Arsenault // 16-bit types are reported as legal for 32-bit registers. We need to 55a9ea8a9aSMatt Arsenault // extend and do a 32-bit copy to avoid the verifier complaining about it. 56a9ea8a9aSMatt Arsenault ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); 57a9ea8a9aSMatt Arsenault } else 58a9ea8a9aSMatt Arsenault ExtReg = extendRegister(ValVReg, VA); 59a9ea8a9aSMatt Arsenault 60a9ea8a9aSMatt Arsenault MIRBuilder.buildCopy(PhysReg, ExtReg); 61a9ea8a9aSMatt Arsenault MIB.addUse(PhysReg, RegState::Implicit); 62206b9927STom Stellard } 63206b9927STom Stellard 64206b9927STom Stellard bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, 65206b9927STom Stellard CCValAssign::LocInfo LocInfo, 66206b9927STom Stellard const CallLowering::ArgInfo &Info, 67206b9927STom Stellard CCState &State) override { 68206b9927STom Stellard return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); 69206b9927STom Stellard } 70206b9927STom Stellard }; 71206b9927STom Stellard 72fecf43ebSMatt Arsenault struct IncomingArgHandler : public CallLowering::ValueHandler { 73fecf43ebSMatt Arsenault uint64_t StackUsed = 0; 74fecf43ebSMatt Arsenault 75fecf43ebSMatt Arsenault IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 76fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 77fecf43ebSMatt Arsenault : ValueHandler(MIRBuilder, MRI, AssignFn) {} 78fecf43ebSMatt Arsenault 79fecf43ebSMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 80fecf43ebSMatt Arsenault MachinePointerInfo &MPO) override { 81fecf43ebSMatt Arsenault auto &MFI = MIRBuilder.getMF().getFrameInfo(); 82fecf43ebSMatt Arsenault int FI = MFI.CreateFixedObject(Size, Offset, true); 83fecf43ebSMatt Arsenault MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); 84fecf43ebSMatt Arsenault Register AddrReg = MRI.createGenericVirtualRegister( 85fecf43ebSMatt Arsenault LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); 86fecf43ebSMatt Arsenault MIRBuilder.buildFrameIndex(AddrReg, FI); 87fecf43ebSMatt Arsenault StackUsed = std::max(StackUsed, Size + Offset); 88fecf43ebSMatt Arsenault return AddrReg; 89fecf43ebSMatt Arsenault } 90fecf43ebSMatt Arsenault 91fecf43ebSMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 92fecf43ebSMatt Arsenault CCValAssign &VA) override { 93fecf43ebSMatt Arsenault markPhysRegUsed(PhysReg); 94fecf43ebSMatt Arsenault 95fecf43ebSMatt Arsenault if (VA.getLocVT().getSizeInBits() < 32) { 96fecf43ebSMatt Arsenault // 16-bit types are reported as legal for 32-bit registers. We need to do 97fecf43ebSMatt Arsenault // a 32-bit copy, and truncate to avoid the verifier complaining about it. 98fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); 99fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 100fecf43ebSMatt Arsenault return; 101fecf43ebSMatt Arsenault } 102fecf43ebSMatt Arsenault 103fecf43ebSMatt Arsenault switch (VA.getLocInfo()) { 104fecf43ebSMatt Arsenault case CCValAssign::LocInfo::SExt: 105fecf43ebSMatt Arsenault case CCValAssign::LocInfo::ZExt: 106fecf43ebSMatt Arsenault case CCValAssign::LocInfo::AExt: { 107fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); 108fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 109fecf43ebSMatt Arsenault break; 110fecf43ebSMatt Arsenault } 111fecf43ebSMatt Arsenault default: 112fecf43ebSMatt Arsenault MIRBuilder.buildCopy(ValVReg, PhysReg); 113fecf43ebSMatt Arsenault break; 114fecf43ebSMatt Arsenault } 115fecf43ebSMatt Arsenault } 116fecf43ebSMatt Arsenault 117fecf43ebSMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 118fecf43ebSMatt Arsenault MachinePointerInfo &MPO, CCValAssign &VA) override { 119fecf43ebSMatt Arsenault // FIXME: Get alignment 120fecf43ebSMatt Arsenault auto MMO = MIRBuilder.getMF().getMachineMemOperand( 121fecf43ebSMatt Arsenault MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); 122fecf43ebSMatt Arsenault MIRBuilder.buildLoad(ValVReg, Addr, *MMO); 123fecf43ebSMatt Arsenault } 124fecf43ebSMatt Arsenault 125fecf43ebSMatt Arsenault /// How the physical register gets marked varies between formal 126fecf43ebSMatt Arsenault /// parameters (it's a basic-block live-in), and a call instruction 127fecf43ebSMatt Arsenault /// (it's an implicit-def of the BL). 128fecf43ebSMatt Arsenault virtual void markPhysRegUsed(unsigned PhysReg) = 0; 129fecf43ebSMatt Arsenault 130fecf43ebSMatt Arsenault // FIXME: What is the point of this being a callback? 131*bc1172dfSAmara Emerson bool isIncomingArgumentHandler() const override { return true; } 132fecf43ebSMatt Arsenault }; 133fecf43ebSMatt Arsenault 134fecf43ebSMatt Arsenault struct FormalArgHandler : public IncomingArgHandler { 135fecf43ebSMatt Arsenault FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 136fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 137fecf43ebSMatt Arsenault : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} 138fecf43ebSMatt Arsenault 139fecf43ebSMatt Arsenault void markPhysRegUsed(unsigned PhysReg) override { 140fecf43ebSMatt Arsenault MIRBuilder.getMBB().addLiveIn(PhysReg); 141fecf43ebSMatt Arsenault } 142fecf43ebSMatt Arsenault }; 143fecf43ebSMatt Arsenault 144206b9927STom Stellard } 145206b9927STom Stellard 146000c5af3STom Stellard AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) 1470da6350dSMatt Arsenault : CallLowering(&TLI) { 148000c5af3STom Stellard } 149000c5af3STom Stellard 150fecf43ebSMatt Arsenault void AMDGPUCallLowering::splitToValueTypes( 151fecf43ebSMatt Arsenault const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, 152fecf43ebSMatt Arsenault const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, 153fecf43ebSMatt Arsenault SplitArgTy PerformArgSplit) const { 154fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 155fecf43ebSMatt Arsenault LLVMContext &Ctx = OrigArg.Ty->getContext(); 156fecf43ebSMatt Arsenault 157fecf43ebSMatt Arsenault if (OrigArg.Ty->isVoidTy()) 158fecf43ebSMatt Arsenault return; 159fecf43ebSMatt Arsenault 160fecf43ebSMatt Arsenault SmallVector<EVT, 4> SplitVTs; 161fecf43ebSMatt Arsenault ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); 162fecf43ebSMatt Arsenault 163b60a2ae4SMatt Arsenault assert(OrigArg.Regs.size() == SplitVTs.size()); 164b60a2ae4SMatt Arsenault 165b60a2ae4SMatt Arsenault int SplitIdx = 0; 166b60a2ae4SMatt Arsenault for (EVT VT : SplitVTs) { 167fecf43ebSMatt Arsenault unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); 168b60a2ae4SMatt Arsenault Type *Ty = VT.getTypeForEVT(Ctx); 169b60a2ae4SMatt Arsenault 170b60a2ae4SMatt Arsenault 171fecf43ebSMatt Arsenault 172fecf43ebSMatt Arsenault if (NumParts == 1) { 173fecf43ebSMatt Arsenault // No splitting to do, but we want to replace the original type (e.g. [1 x 174fecf43ebSMatt Arsenault // double] -> double). 175b60a2ae4SMatt Arsenault SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, 176fecf43ebSMatt Arsenault OrigArg.Flags, OrigArg.IsFixed); 177b60a2ae4SMatt Arsenault 178b60a2ae4SMatt Arsenault ++SplitIdx; 179b60a2ae4SMatt Arsenault continue; 180fecf43ebSMatt Arsenault } 181fecf43ebSMatt Arsenault 182b60a2ae4SMatt Arsenault LLT LLTy = getLLTForType(*Ty, DL); 183b60a2ae4SMatt Arsenault 184fecf43ebSMatt Arsenault SmallVector<Register, 8> SplitRegs; 185fecf43ebSMatt Arsenault 186fecf43ebSMatt Arsenault EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); 187fecf43ebSMatt Arsenault Type *PartTy = PartVT.getTypeForEVT(Ctx); 188fecf43ebSMatt Arsenault LLT PartLLT = getLLTForType(*PartTy, DL); 189fecf43ebSMatt Arsenault 190fecf43ebSMatt Arsenault // FIXME: Should we be reporting all of the part registers for a single 191fecf43ebSMatt Arsenault // argument, and let handleAssignments take care of the repacking? 192fecf43ebSMatt Arsenault for (unsigned i = 0; i < NumParts; ++i) { 193fecf43ebSMatt Arsenault Register PartReg = MRI.createGenericVirtualRegister(PartLLT); 194fecf43ebSMatt Arsenault SplitRegs.push_back(PartReg); 195fecf43ebSMatt Arsenault SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); 196fecf43ebSMatt Arsenault } 197fecf43ebSMatt Arsenault 198b60a2ae4SMatt Arsenault PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); 199b60a2ae4SMatt Arsenault 200b60a2ae4SMatt Arsenault ++SplitIdx; 201b60a2ae4SMatt Arsenault } 202fecf43ebSMatt Arsenault } 203fecf43ebSMatt Arsenault 204a9ea8a9aSMatt Arsenault // Get the appropriate type to make \p OrigTy \p Factor times bigger. 205a9ea8a9aSMatt Arsenault static LLT getMultipleType(LLT OrigTy, int Factor) { 206a9ea8a9aSMatt Arsenault if (OrigTy.isVector()) { 207a9ea8a9aSMatt Arsenault return LLT::vector(OrigTy.getNumElements() * Factor, 208a9ea8a9aSMatt Arsenault OrigTy.getElementType()); 209a9ea8a9aSMatt Arsenault } 210a9ea8a9aSMatt Arsenault 211a9ea8a9aSMatt Arsenault return LLT::scalar(OrigTy.getSizeInBits() * Factor); 212a9ea8a9aSMatt Arsenault } 213a9ea8a9aSMatt Arsenault 214a9ea8a9aSMatt Arsenault // TODO: Move to generic code 215a9ea8a9aSMatt Arsenault static void unpackRegsToOrigType(MachineIRBuilder &MIRBuilder, 216a9ea8a9aSMatt Arsenault ArrayRef<Register> DstRegs, 217a9ea8a9aSMatt Arsenault Register SrcReg, 218a9ea8a9aSMatt Arsenault LLT SrcTy, 219a9ea8a9aSMatt Arsenault LLT PartTy) { 220a9ea8a9aSMatt Arsenault assert(DstRegs.size() > 1 && "Nothing to unpack"); 221a9ea8a9aSMatt Arsenault 222a9ea8a9aSMatt Arsenault MachineFunction &MF = MIRBuilder.getMF(); 223a9ea8a9aSMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 224a9ea8a9aSMatt Arsenault 225a9ea8a9aSMatt Arsenault const unsigned SrcSize = SrcTy.getSizeInBits(); 226a9ea8a9aSMatt Arsenault const unsigned PartSize = PartTy.getSizeInBits(); 227a9ea8a9aSMatt Arsenault 228a9ea8a9aSMatt Arsenault if (SrcTy.isVector() && !PartTy.isVector() && 229a9ea8a9aSMatt Arsenault PartSize > SrcTy.getElementType().getSizeInBits()) { 230a9ea8a9aSMatt Arsenault // Vector was scalarized, and the elements extended. 231a9ea8a9aSMatt Arsenault auto UnmergeToEltTy = MIRBuilder.buildUnmerge(SrcTy.getElementType(), 232a9ea8a9aSMatt Arsenault SrcReg); 233a9ea8a9aSMatt Arsenault for (int i = 0, e = DstRegs.size(); i != e; ++i) 234a9ea8a9aSMatt Arsenault MIRBuilder.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); 235a9ea8a9aSMatt Arsenault return; 236a9ea8a9aSMatt Arsenault } 237a9ea8a9aSMatt Arsenault 238a9ea8a9aSMatt Arsenault if (SrcSize % PartSize == 0) { 239a9ea8a9aSMatt Arsenault MIRBuilder.buildUnmerge(DstRegs, SrcReg); 240a9ea8a9aSMatt Arsenault return; 241a9ea8a9aSMatt Arsenault } 242a9ea8a9aSMatt Arsenault 243a9ea8a9aSMatt Arsenault const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; 244a9ea8a9aSMatt Arsenault 245a9ea8a9aSMatt Arsenault LLT BigTy = getMultipleType(PartTy, NumRoundedParts); 246a9ea8a9aSMatt Arsenault auto ImpDef = MIRBuilder.buildUndef(BigTy); 247a9ea8a9aSMatt Arsenault 248a9ea8a9aSMatt Arsenault Register BigReg = MRI.createGenericVirtualRegister(BigTy); 249a9ea8a9aSMatt Arsenault MIRBuilder.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0); 250a9ea8a9aSMatt Arsenault 251a9ea8a9aSMatt Arsenault int64_t Offset = 0; 252a9ea8a9aSMatt Arsenault for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) 253a9ea8a9aSMatt Arsenault MIRBuilder.buildExtract(DstRegs[i], BigReg, Offset); 254a9ea8a9aSMatt Arsenault } 255a9ea8a9aSMatt Arsenault 256a9ea8a9aSMatt Arsenault /// Lower the return value for the already existing \p Ret. This assumes that 257a9ea8a9aSMatt Arsenault /// \p MIRBuilder's insertion point is correct. 258a9ea8a9aSMatt Arsenault bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, 259a9ea8a9aSMatt Arsenault const Value *Val, ArrayRef<Register> VRegs, 260a9ea8a9aSMatt Arsenault MachineInstrBuilder &Ret) const { 261a9ea8a9aSMatt Arsenault if (!Val) 262a9ea8a9aSMatt Arsenault return true; 263a9ea8a9aSMatt Arsenault 264a9ea8a9aSMatt Arsenault auto &MF = MIRBuilder.getMF(); 265a9ea8a9aSMatt Arsenault const auto &F = MF.getFunction(); 266a9ea8a9aSMatt Arsenault const DataLayout &DL = MF.getDataLayout(); 267a9ea8a9aSMatt Arsenault 268a9ea8a9aSMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 269a9ea8a9aSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 270a9ea8a9aSMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 271a9ea8a9aSMatt Arsenault 272a9ea8a9aSMatt Arsenault ArgInfo OrigRetInfo(VRegs, Val->getType()); 273a9ea8a9aSMatt Arsenault setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); 274a9ea8a9aSMatt Arsenault SmallVector<ArgInfo, 4> SplitRetInfos; 275a9ea8a9aSMatt Arsenault 276a9ea8a9aSMatt Arsenault splitToValueTypes( 277a9ea8a9aSMatt Arsenault OrigRetInfo, SplitRetInfos, DL, MRI, CC, 278a9ea8a9aSMatt Arsenault [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { 279a9ea8a9aSMatt Arsenault unpackRegsToOrigType(MIRBuilder, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); 280a9ea8a9aSMatt Arsenault }); 281a9ea8a9aSMatt Arsenault 282a9ea8a9aSMatt Arsenault CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); 283a9ea8a9aSMatt Arsenault 284a9ea8a9aSMatt Arsenault OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn); 285a9ea8a9aSMatt Arsenault return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler); 286a9ea8a9aSMatt Arsenault } 287a9ea8a9aSMatt Arsenault 288000c5af3STom Stellard bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, 28949168f67SAlexander Ivchenko const Value *Val, 290e3a676e9SMatt Arsenault ArrayRef<Register> VRegs) const { 291206b9927STom Stellard 292206b9927STom Stellard MachineFunction &MF = MIRBuilder.getMF(); 293206b9927STom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 294206b9927STom Stellard SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 295206b9927STom Stellard MFI->setIfReturnsVoid(!Val); 296206b9927STom Stellard 297a9ea8a9aSMatt Arsenault assert(!Val == VRegs.empty() && "Return value without a vreg"); 298a9ea8a9aSMatt Arsenault 299a9ea8a9aSMatt Arsenault CallingConv::ID CC = MIRBuilder.getMF().getFunction().getCallingConv(); 300a9ea8a9aSMatt Arsenault const bool IsShader = AMDGPU::isShader(CC); 301a9ea8a9aSMatt Arsenault const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || 302a9ea8a9aSMatt Arsenault AMDGPU::isKernel(CC); 303a9ea8a9aSMatt Arsenault if (IsWaveEnd) { 304a9ea8a9aSMatt Arsenault MIRBuilder.buildInstr(AMDGPU::S_ENDPGM) 305a9ea8a9aSMatt Arsenault .addImm(0); 306206b9927STom Stellard return true; 307206b9927STom Stellard } 308206b9927STom Stellard 309a9ea8a9aSMatt Arsenault auto const &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>(); 310206b9927STom Stellard 311711556e6SMichael Liao unsigned ReturnOpc = 312711556e6SMichael Liao IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; 313257882ffSTom Stellard 314a9ea8a9aSMatt Arsenault auto Ret = MIRBuilder.buildInstrNoInsert(ReturnOpc); 315a9ea8a9aSMatt Arsenault Register ReturnAddrVReg; 316a9ea8a9aSMatt Arsenault if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { 317a9ea8a9aSMatt Arsenault ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); 318a9ea8a9aSMatt Arsenault Ret.addUse(ReturnAddrVReg); 319206b9927STom Stellard } 320206b9927STom Stellard 321a9ea8a9aSMatt Arsenault if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret)) 322a9ea8a9aSMatt Arsenault return false; 323a9ea8a9aSMatt Arsenault 324a9ea8a9aSMatt Arsenault if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { 325a9ea8a9aSMatt Arsenault const SIRegisterInfo *TRI = ST.getRegisterInfo(); 326a9ea8a9aSMatt Arsenault Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), 327a9ea8a9aSMatt Arsenault &AMDGPU::SGPR_64RegClass); 328a9ea8a9aSMatt Arsenault MIRBuilder.buildCopy(ReturnAddrVReg, LiveInReturn); 329a9ea8a9aSMatt Arsenault } 330a9ea8a9aSMatt Arsenault 331a9ea8a9aSMatt Arsenault // TODO: Handle CalleeSavedRegsViaCopy. 332a9ea8a9aSMatt Arsenault 333a9ea8a9aSMatt Arsenault MIRBuilder.insertInstr(Ret); 334000c5af3STom Stellard return true; 335000c5af3STom Stellard } 336000c5af3STom Stellard 337faeaedf8SMatt Arsenault Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, 338ca16621bSTom Stellard Type *ParamTy, 33929f30379SMatt Arsenault uint64_t Offset) const { 340ca16621bSTom Stellard 341ca16621bSTom Stellard MachineFunction &MF = MIRBuilder.getMF(); 3428623e8d8SMatt Arsenault const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 343ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 344f1caa283SMatthias Braun const Function &F = MF.getFunction(); 345ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 3460da6350dSMatt Arsenault PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 34752b4ce72SDaniel Sanders LLT PtrType = getLLTForType(*PtrTy, DL); 348faeaedf8SMatt Arsenault Register DstReg = MRI.createGenericVirtualRegister(PtrType); 349faeaedf8SMatt Arsenault Register KernArgSegmentPtr = 3508623e8d8SMatt Arsenault MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 351faeaedf8SMatt Arsenault Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); 352ca16621bSTom Stellard 353faeaedf8SMatt Arsenault Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 354ca16621bSTom Stellard MIRBuilder.buildConstant(OffsetReg, Offset); 355ca16621bSTom Stellard 356ca16621bSTom Stellard MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); 357ca16621bSTom Stellard 358ca16621bSTom Stellard return DstReg; 359ca16621bSTom Stellard } 360ca16621bSTom Stellard 361ca16621bSTom Stellard void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, 36229f30379SMatt Arsenault Type *ParamTy, uint64_t Offset, 36329f30379SMatt Arsenault unsigned Align, 364e3a676e9SMatt Arsenault Register DstReg) const { 365ca16621bSTom Stellard MachineFunction &MF = MIRBuilder.getMF(); 366f1caa283SMatthias Braun const Function &F = MF.getFunction(); 367ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 3680da6350dSMatt Arsenault PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 369ca16621bSTom Stellard MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); 370ca16621bSTom Stellard unsigned TypeSize = DL.getTypeStoreSize(ParamTy); 371e3a676e9SMatt Arsenault Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); 372ca16621bSTom Stellard 373ca16621bSTom Stellard MachineMemOperand *MMO = 374ca16621bSTom Stellard MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | 3757df225dfSMatt Arsenault MachineMemOperand::MODereferenceable | 376ca16621bSTom Stellard MachineMemOperand::MOInvariant, 377ca16621bSTom Stellard TypeSize, Align); 378ca16621bSTom Stellard 379ca16621bSTom Stellard MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); 380ca16621bSTom Stellard } 381ca16621bSTom Stellard 382bae3636fSMatt Arsenault // Allocate special inputs passed in user SGPRs. 383bae3636fSMatt Arsenault static void allocateHSAUserSGPRs(CCState &CCInfo, 384bae3636fSMatt Arsenault MachineIRBuilder &MIRBuilder, 385bae3636fSMatt Arsenault MachineFunction &MF, 386bae3636fSMatt Arsenault const SIRegisterInfo &TRI, 387bae3636fSMatt Arsenault SIMachineFunctionInfo &Info) { 388bae3636fSMatt Arsenault // FIXME: How should these inputs interact with inreg / custom SGPR inputs? 389bae3636fSMatt Arsenault if (Info.hasPrivateSegmentBuffer()) { 390bae3636fSMatt Arsenault unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); 391bae3636fSMatt Arsenault MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); 392bae3636fSMatt Arsenault CCInfo.AllocateReg(PrivateSegmentBufferReg); 393bae3636fSMatt Arsenault } 394bae3636fSMatt Arsenault 395bae3636fSMatt Arsenault if (Info.hasDispatchPtr()) { 396bae3636fSMatt Arsenault unsigned DispatchPtrReg = Info.addDispatchPtr(TRI); 397bae3636fSMatt Arsenault MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); 398bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchPtrReg); 399bae3636fSMatt Arsenault } 400bae3636fSMatt Arsenault 401bae3636fSMatt Arsenault if (Info.hasQueuePtr()) { 402bae3636fSMatt Arsenault unsigned QueuePtrReg = Info.addQueuePtr(TRI); 403bae3636fSMatt Arsenault MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); 404bae3636fSMatt Arsenault CCInfo.AllocateReg(QueuePtrReg); 405bae3636fSMatt Arsenault } 406bae3636fSMatt Arsenault 407bae3636fSMatt Arsenault if (Info.hasKernargSegmentPtr()) { 408bae3636fSMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 409bae3636fSMatt Arsenault Register InputPtrReg = Info.addKernargSegmentPtr(TRI); 410bae3636fSMatt Arsenault const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); 411bae3636fSMatt Arsenault Register VReg = MRI.createGenericVirtualRegister(P4); 412bae3636fSMatt Arsenault MRI.addLiveIn(InputPtrReg, VReg); 413bae3636fSMatt Arsenault MIRBuilder.getMBB().addLiveIn(InputPtrReg); 414bae3636fSMatt Arsenault MIRBuilder.buildCopy(VReg, InputPtrReg); 415bae3636fSMatt Arsenault CCInfo.AllocateReg(InputPtrReg); 416bae3636fSMatt Arsenault } 417bae3636fSMatt Arsenault 418bae3636fSMatt Arsenault if (Info.hasDispatchID()) { 419bae3636fSMatt Arsenault unsigned DispatchIDReg = Info.addDispatchID(TRI); 420bae3636fSMatt Arsenault MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); 421bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchIDReg); 422bae3636fSMatt Arsenault } 423bae3636fSMatt Arsenault 424bae3636fSMatt Arsenault if (Info.hasFlatScratchInit()) { 425bae3636fSMatt Arsenault unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI); 426bae3636fSMatt Arsenault MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); 427bae3636fSMatt Arsenault CCInfo.AllocateReg(FlatScratchInitReg); 428bae3636fSMatt Arsenault } 429bae3636fSMatt Arsenault 430bae3636fSMatt Arsenault // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read 431bae3636fSMatt Arsenault // these from the dispatch pointer. 432bae3636fSMatt Arsenault } 433bae3636fSMatt Arsenault 434b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArgumentsKernel( 435c3dbe239SDiana Picus MachineIRBuilder &MIRBuilder, const Function &F, 436c3dbe239SDiana Picus ArrayRef<ArrayRef<Register>> VRegs) const { 437ca16621bSTom Stellard MachineFunction &MF = MIRBuilder.getMF(); 4385bfbae5cSTom Stellard const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); 439ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 440ca16621bSTom Stellard SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 441fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); 442fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 443fecf43ebSMatt Arsenault 444ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 445ca16621bSTom Stellard 446ca16621bSTom Stellard SmallVector<CCValAssign, 16> ArgLocs; 447ca16621bSTom Stellard CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); 448ca16621bSTom Stellard 449bae3636fSMatt Arsenault allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); 450bae3636fSMatt Arsenault 45129f30379SMatt Arsenault unsigned i = 0; 45229f30379SMatt Arsenault const unsigned KernArgBaseAlign = 16; 45329f30379SMatt Arsenault const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); 45429f30379SMatt Arsenault uint64_t ExplicitArgOffset = 0; 45529f30379SMatt Arsenault 45629f30379SMatt Arsenault // TODO: Align down to dword alignment and extract bits for extending loads. 45729f30379SMatt Arsenault for (auto &Arg : F.args()) { 45829f30379SMatt Arsenault Type *ArgTy = Arg.getType(); 45929f30379SMatt Arsenault unsigned AllocSize = DL.getTypeAllocSize(ArgTy); 46029f30379SMatt Arsenault if (AllocSize == 0) 46129f30379SMatt Arsenault continue; 46229f30379SMatt Arsenault 46329f30379SMatt Arsenault unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); 46429f30379SMatt Arsenault 46529f30379SMatt Arsenault uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; 46629f30379SMatt Arsenault ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; 46729f30379SMatt Arsenault 468c3dbe239SDiana Picus ArrayRef<Register> OrigArgRegs = VRegs[i]; 469c3dbe239SDiana Picus Register ArgReg = 470c3dbe239SDiana Picus OrigArgRegs.size() == 1 471c3dbe239SDiana Picus ? OrigArgRegs[0] 472c3dbe239SDiana Picus : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); 47329f30379SMatt Arsenault unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); 47429f30379SMatt Arsenault ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); 475c3dbe239SDiana Picus lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); 476c3dbe239SDiana Picus if (OrigArgRegs.size() > 1) 477c3dbe239SDiana Picus unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); 47829f30379SMatt Arsenault ++i; 47929f30379SMatt Arsenault } 48029f30379SMatt Arsenault 481fecf43ebSMatt Arsenault TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); 482fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); 48329f30379SMatt Arsenault return true; 48429f30379SMatt Arsenault } 48529f30379SMatt Arsenault 486a9ea8a9aSMatt Arsenault // TODO: Move this to generic code 487fecf43ebSMatt Arsenault static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder, 488fecf43ebSMatt Arsenault ArrayRef<Register> OrigRegs, 489fecf43ebSMatt Arsenault ArrayRef<Register> Regs, 490fecf43ebSMatt Arsenault LLT LLTy, 491fecf43ebSMatt Arsenault LLT PartLLT) { 492fecf43ebSMatt Arsenault if (!LLTy.isVector() && !PartLLT.isVector()) { 493fecf43ebSMatt Arsenault MIRBuilder.buildMerge(OrigRegs[0], Regs); 494fecf43ebSMatt Arsenault return; 495fecf43ebSMatt Arsenault } 496fecf43ebSMatt Arsenault 497fecf43ebSMatt Arsenault if (LLTy.isVector() && PartLLT.isVector()) { 498fecf43ebSMatt Arsenault assert(LLTy.getElementType() == PartLLT.getElementType()); 499fecf43ebSMatt Arsenault 500fecf43ebSMatt Arsenault int DstElts = LLTy.getNumElements(); 501fecf43ebSMatt Arsenault int PartElts = PartLLT.getNumElements(); 502fecf43ebSMatt Arsenault if (DstElts % PartElts == 0) 503fecf43ebSMatt Arsenault MIRBuilder.buildConcatVectors(OrigRegs[0], Regs); 504fecf43ebSMatt Arsenault else { 505fecf43ebSMatt Arsenault // Deal with v3s16 split into v2s16 506fecf43ebSMatt Arsenault assert(PartElts == 2 && DstElts % 2 != 0); 507fecf43ebSMatt Arsenault int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); 508fecf43ebSMatt Arsenault 509fecf43ebSMatt Arsenault LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); 510fecf43ebSMatt Arsenault auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs); 511fecf43ebSMatt Arsenault MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0); 512fecf43ebSMatt Arsenault } 513fecf43ebSMatt Arsenault 514fecf43ebSMatt Arsenault return; 515fecf43ebSMatt Arsenault } 516fecf43ebSMatt Arsenault 517fecf43ebSMatt Arsenault assert(LLTy.isVector() && !PartLLT.isVector()); 518fecf43ebSMatt Arsenault 519fecf43ebSMatt Arsenault LLT DstEltTy = LLTy.getElementType(); 520fecf43ebSMatt Arsenault if (DstEltTy == PartLLT) { 521fecf43ebSMatt Arsenault // Vector was trivially scalarized. 522fecf43ebSMatt Arsenault MIRBuilder.buildBuildVector(OrigRegs[0], Regs); 523fecf43ebSMatt Arsenault } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { 524fecf43ebSMatt Arsenault // Deal with vector with 64-bit elements decomposed to 32-bit 525fecf43ebSMatt Arsenault // registers. Need to create intermediate 64-bit elements. 526fecf43ebSMatt Arsenault SmallVector<Register, 8> EltMerges; 527fecf43ebSMatt Arsenault int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); 528fecf43ebSMatt Arsenault 529fecf43ebSMatt Arsenault assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); 530fecf43ebSMatt Arsenault 531fecf43ebSMatt Arsenault for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { 532fecf43ebSMatt Arsenault auto Merge = MIRBuilder.buildMerge(DstEltTy, 533fecf43ebSMatt Arsenault Regs.take_front(PartsPerElt)); 534fecf43ebSMatt Arsenault EltMerges.push_back(Merge.getReg(0)); 535fecf43ebSMatt Arsenault Regs = Regs.drop_front(PartsPerElt); 536fecf43ebSMatt Arsenault } 537fecf43ebSMatt Arsenault 538fecf43ebSMatt Arsenault MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges); 539fecf43ebSMatt Arsenault } else { 540fecf43ebSMatt Arsenault // Vector was split, and elements promoted to a wider type. 541fecf43ebSMatt Arsenault LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); 542fecf43ebSMatt Arsenault auto BV = MIRBuilder.buildBuildVector(BVType, Regs); 543fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(OrigRegs[0], BV); 544fecf43ebSMatt Arsenault } 545fecf43ebSMatt Arsenault } 546fecf43ebSMatt Arsenault 547b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArguments( 548b725d273SMatt Arsenault MachineIRBuilder &MIRBuilder, const Function &F, 549b725d273SMatt Arsenault ArrayRef<ArrayRef<Register>> VRegs) const { 550fecf43ebSMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 551fecf43ebSMatt Arsenault 552b725d273SMatt Arsenault // The infrastructure for normal calling convention lowering is essentially 553b725d273SMatt Arsenault // useless for kernels. We want to avoid any kind of legalization or argument 554b725d273SMatt Arsenault // splitting. 555fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_KERNEL) 556b725d273SMatt Arsenault return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); 557b725d273SMatt Arsenault 558fecf43ebSMatt Arsenault const bool IsShader = AMDGPU::isShader(CC); 559fecf43ebSMatt Arsenault const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); 560fecf43ebSMatt Arsenault 561b725d273SMatt Arsenault MachineFunction &MF = MIRBuilder.getMF(); 562fecf43ebSMatt Arsenault MachineBasicBlock &MBB = MIRBuilder.getMBB(); 563b725d273SMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 564b725d273SMatt Arsenault SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 565fecf43ebSMatt Arsenault const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 566fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); 567b725d273SMatt Arsenault const DataLayout &DL = F.getParent()->getDataLayout(); 568b725d273SMatt Arsenault 569b725d273SMatt Arsenault 570b725d273SMatt Arsenault SmallVector<CCValAssign, 16> ArgLocs; 571fecf43ebSMatt Arsenault CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); 572b725d273SMatt Arsenault 573a9ea8a9aSMatt Arsenault if (!IsEntryFunc) { 574a9ea8a9aSMatt Arsenault Register ReturnAddrReg = TRI->getReturnAddressReg(MF); 575a9ea8a9aSMatt Arsenault Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, 576a9ea8a9aSMatt Arsenault &AMDGPU::SGPR_64RegClass); 577a9ea8a9aSMatt Arsenault MBB.addLiveIn(ReturnAddrReg); 578a9ea8a9aSMatt Arsenault MIRBuilder.buildCopy(LiveInReturn, ReturnAddrReg); 579a9ea8a9aSMatt Arsenault } 580a9ea8a9aSMatt Arsenault 581bae3636fSMatt Arsenault if (Info->hasImplicitBufferPtr()) { 582fecf43ebSMatt Arsenault Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); 583bae3636fSMatt Arsenault MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); 584bae3636fSMatt Arsenault CCInfo.AllocateReg(ImplicitBufferPtrReg); 585bae3636fSMatt Arsenault } 586bae3636fSMatt Arsenault 587fecf43ebSMatt Arsenault 588fecf43ebSMatt Arsenault SmallVector<ArgInfo, 32> SplitArgs; 589fecf43ebSMatt Arsenault unsigned Idx = 0; 590c7709e1cSTom Stellard unsigned PSInputNum = 0; 5919d8337d8STom Stellard 592fecf43ebSMatt Arsenault for (auto &Arg : F.args()) { 593fecf43ebSMatt Arsenault if (DL.getTypeStoreSize(Arg.getType()) == 0) 594c7709e1cSTom Stellard continue; 595c7709e1cSTom Stellard 596fecf43ebSMatt Arsenault const bool InReg = Arg.hasAttribute(Attribute::InReg); 597fecf43ebSMatt Arsenault 598fecf43ebSMatt Arsenault // SGPR arguments to functions not implemented. 599fecf43ebSMatt Arsenault if (!IsShader && InReg) 600fecf43ebSMatt Arsenault return false; 601fecf43ebSMatt Arsenault 602a9ea8a9aSMatt Arsenault if (Arg.hasAttribute(Attribute::SwiftSelf) || 603fecf43ebSMatt Arsenault Arg.hasAttribute(Attribute::SwiftError) || 604b60a2ae4SMatt Arsenault Arg.hasAttribute(Attribute::Nest)) 605fecf43ebSMatt Arsenault return false; 606fecf43ebSMatt Arsenault 607fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { 608fecf43ebSMatt Arsenault const bool ArgUsed = !Arg.use_empty(); 609fecf43ebSMatt Arsenault bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); 610fecf43ebSMatt Arsenault 611fecf43ebSMatt Arsenault if (!SkipArg) { 612c7709e1cSTom Stellard Info->markPSInputAllocated(PSInputNum); 613fecf43ebSMatt Arsenault if (ArgUsed) 614c7709e1cSTom Stellard Info->markPSInputEnabled(PSInputNum); 615fecf43ebSMatt Arsenault } 616c7709e1cSTom Stellard 617c7709e1cSTom Stellard ++PSInputNum; 618c7709e1cSTom Stellard 619fecf43ebSMatt Arsenault if (SkipArg) { 620b60a2ae4SMatt Arsenault for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) 621b60a2ae4SMatt Arsenault MIRBuilder.buildUndef(VRegs[Idx][I]); 622b60a2ae4SMatt Arsenault 623fecf43ebSMatt Arsenault ++Idx; 624c7709e1cSTom Stellard continue; 625fecf43ebSMatt Arsenault } 6269d8337d8STom Stellard } 627e0a4da8cSMatt Arsenault 628fecf43ebSMatt Arsenault ArgInfo OrigArg(VRegs[Idx], Arg.getType()); 629fecf43ebSMatt Arsenault setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); 630b60a2ae4SMatt Arsenault 631b60a2ae4SMatt Arsenault splitToValueTypes( 632b60a2ae4SMatt Arsenault OrigArg, SplitArgs, DL, MRI, CC, 633fecf43ebSMatt Arsenault // FIXME: We should probably be passing multiple registers to 634fecf43ebSMatt Arsenault // handleAssignments to do this 635b60a2ae4SMatt Arsenault [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { 636b60a2ae4SMatt Arsenault packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs, 637b60a2ae4SMatt Arsenault LLTy, PartLLT); 638fecf43ebSMatt Arsenault }); 639fecf43ebSMatt Arsenault 640fecf43ebSMatt Arsenault ++Idx; 6419d8337d8STom Stellard } 6429d8337d8STom Stellard 643fecf43ebSMatt Arsenault // At least one interpolation mode must be enabled or else the GPU will 644fecf43ebSMatt Arsenault // hang. 645fecf43ebSMatt Arsenault // 646fecf43ebSMatt Arsenault // Check PSInputAddr instead of PSInputEnable. The idea is that if the user 647fecf43ebSMatt Arsenault // set PSInputAddr, the user wants to enable some bits after the compilation 648fecf43ebSMatt Arsenault // based on run-time states. Since we can't know what the final PSInputEna 649fecf43ebSMatt Arsenault // will look like, so we shouldn't do anything here and the user should take 650fecf43ebSMatt Arsenault // responsibility for the correct programming. 651fecf43ebSMatt Arsenault // 652fecf43ebSMatt Arsenault // Otherwise, the following restrictions apply: 653fecf43ebSMatt Arsenault // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. 654fecf43ebSMatt Arsenault // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be 655fecf43ebSMatt Arsenault // enabled too. 656fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS) { 657fecf43ebSMatt Arsenault if ((Info->getPSInputAddr() & 0x7F) == 0 || 658fecf43ebSMatt Arsenault ((Info->getPSInputAddr() & 0xF) == 0 && 659fecf43ebSMatt Arsenault Info->isPSInputAllocated(11))) { 660fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR0); 661fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR1); 662fecf43ebSMatt Arsenault Info->markPSInputAllocated(0); 663fecf43ebSMatt Arsenault Info->markPSInputEnabled(0); 664fecf43ebSMatt Arsenault } 665fecf43ebSMatt Arsenault 666fecf43ebSMatt Arsenault if (Subtarget.isAmdPalOS()) { 667fecf43ebSMatt Arsenault // For isAmdPalOS, the user does not enable some bits after compilation 668fecf43ebSMatt Arsenault // based on run-time states; the register values being generated here are 669fecf43ebSMatt Arsenault // the final ones set in hardware. Therefore we need to apply the 670fecf43ebSMatt Arsenault // workaround to PSInputAddr and PSInputEnable together. (The case where 671fecf43ebSMatt Arsenault // a bit is set in PSInputAddr but not PSInputEnable is where the frontend 672fecf43ebSMatt Arsenault // set up an input arg for a particular interpolation mode, but nothing 673fecf43ebSMatt Arsenault // uses that input arg. Really we should have an earlier pass that removes 674fecf43ebSMatt Arsenault // such an arg.) 675fecf43ebSMatt Arsenault unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); 676fecf43ebSMatt Arsenault if ((PsInputBits & 0x7F) == 0 || 677fecf43ebSMatt Arsenault ((PsInputBits & 0xF) == 0 && 678fecf43ebSMatt Arsenault (PsInputBits >> 11 & 1))) 679fecf43ebSMatt Arsenault Info->markPSInputEnabled( 680fecf43ebSMatt Arsenault countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); 681fecf43ebSMatt Arsenault } 682fecf43ebSMatt Arsenault } 683fecf43ebSMatt Arsenault 684fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 685fecf43ebSMatt Arsenault CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); 686fecf43ebSMatt Arsenault 687fecf43ebSMatt Arsenault if (!MBB.empty()) 688fecf43ebSMatt Arsenault MIRBuilder.setInstr(*MBB.begin()); 689fecf43ebSMatt Arsenault 690fecf43ebSMatt Arsenault FormalArgHandler Handler(MIRBuilder, MRI, AssignFn); 691fecf43ebSMatt Arsenault if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler)) 69229f30379SMatt Arsenault return false; 693fecf43ebSMatt Arsenault 694fecf43ebSMatt Arsenault if (!IsEntryFunc) { 695fecf43ebSMatt Arsenault // Special inputs come after user arguments. 696fecf43ebSMatt Arsenault TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); 697fecf43ebSMatt Arsenault } 698fecf43ebSMatt Arsenault 699fecf43ebSMatt Arsenault // Start adding system SGPRs. 700fecf43ebSMatt Arsenault if (IsEntryFunc) { 701fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); 702fecf43ebSMatt Arsenault } else { 703fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getScratchRSrcReg()); 704fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); 705fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getFrameOffsetReg()); 706fecf43ebSMatt Arsenault TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); 707fecf43ebSMatt Arsenault } 708fecf43ebSMatt Arsenault 709fecf43ebSMatt Arsenault // Move back to the end of the basic block. 710fecf43ebSMatt Arsenault MIRBuilder.setMBB(MBB); 711fecf43ebSMatt Arsenault 712fecf43ebSMatt Arsenault return true; 713000c5af3STom Stellard } 714