1d8ea85acSTom Stellard //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// 2000c5af3STom Stellard // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6000c5af3STom Stellard // 7000c5af3STom Stellard //===----------------------------------------------------------------------===// 8000c5af3STom Stellard /// 9000c5af3STom Stellard /// \file 10000c5af3STom Stellard /// This file implements the lowering of LLVM calls to machine code calls for 11000c5af3STom Stellard /// GlobalISel. 12000c5af3STom Stellard /// 13000c5af3STom Stellard //===----------------------------------------------------------------------===// 14000c5af3STom Stellard 15000c5af3STom Stellard #include "AMDGPUCallLowering.h" 16ca16621bSTom Stellard #include "AMDGPU.h" 17000c5af3STom Stellard #include "AMDGPUISelLowering.h" 18ca16621bSTom Stellard #include "AMDGPUSubtarget.h" 19ca16621bSTom Stellard #include "SIISelLowering.h" 20ca16621bSTom Stellard #include "SIMachineFunctionInfo.h" 216bda14b3SChandler Carruth #include "SIRegisterInfo.h" 2244b30b45STom Stellard #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23206b9927STom Stellard #include "llvm/CodeGen/Analysis.h" 24ca16621bSTom Stellard #include "llvm/CodeGen/CallingConvLower.h" 25000c5af3STom Stellard #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26000c5af3STom Stellard #include "llvm/CodeGen/MachineInstrBuilder.h" 27206b9927STom Stellard #include "llvm/Support/LowLevelTypeImpl.h" 28000c5af3STom Stellard 29000c5af3STom Stellard using namespace llvm; 30000c5af3STom Stellard 31206b9927STom Stellard namespace { 32206b9927STom Stellard 33a9ea8a9aSMatt Arsenault struct OutgoingValueHandler : public CallLowering::ValueHandler { 3406c8cb03SAustin Kerbow OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, 35206b9927STom Stellard MachineInstrBuilder MIB, CCAssignFn *AssignFn) 3606c8cb03SAustin Kerbow : ValueHandler(B, MRI, AssignFn), MIB(MIB) {} 37206b9927STom Stellard 38206b9927STom Stellard MachineInstrBuilder MIB; 39206b9927STom Stellard 409f9151d4SQuentin Colombet bool isIncomingArgumentHandler() const override { return false; } 419f9151d4SQuentin Colombet 42faeaedf8SMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 43206b9927STom Stellard MachinePointerInfo &MPO) override { 44206b9927STom Stellard llvm_unreachable("not implemented"); 45206b9927STom Stellard } 46206b9927STom Stellard 47faeaedf8SMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 48206b9927STom Stellard MachinePointerInfo &MPO, CCValAssign &VA) override { 49206b9927STom Stellard llvm_unreachable("not implemented"); 50206b9927STom Stellard } 51206b9927STom Stellard 52faeaedf8SMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 53206b9927STom Stellard CCValAssign &VA) override { 54a9ea8a9aSMatt Arsenault Register ExtReg; 55a9ea8a9aSMatt Arsenault if (VA.getLocVT().getSizeInBits() < 32) { 56a9ea8a9aSMatt Arsenault // 16-bit types are reported as legal for 32-bit registers. We need to 57a9ea8a9aSMatt Arsenault // extend and do a 32-bit copy to avoid the verifier complaining about it. 58a9ea8a9aSMatt Arsenault ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); 59a9ea8a9aSMatt Arsenault } else 60a9ea8a9aSMatt Arsenault ExtReg = extendRegister(ValVReg, VA); 61a9ea8a9aSMatt Arsenault 6267cfbec7SMatt Arsenault // If this is a scalar return, insert a readfirstlane just in case the value 6367cfbec7SMatt Arsenault // ends up in a VGPR. 6467cfbec7SMatt Arsenault // FIXME: Assert this is a shader return. 6567cfbec7SMatt Arsenault const SIRegisterInfo *TRI 6667cfbec7SMatt Arsenault = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo()); 6767cfbec7SMatt Arsenault if (TRI->isSGPRReg(MRI, PhysReg)) { 6867cfbec7SMatt Arsenault auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane, 6967cfbec7SMatt Arsenault {MRI.getType(ExtReg)}, false) 7067cfbec7SMatt Arsenault .addReg(ExtReg); 7167cfbec7SMatt Arsenault ExtReg = ToSGPR.getReg(0); 7267cfbec7SMatt Arsenault } 7367cfbec7SMatt Arsenault 74a9ea8a9aSMatt Arsenault MIRBuilder.buildCopy(PhysReg, ExtReg); 75a9ea8a9aSMatt Arsenault MIB.addUse(PhysReg, RegState::Implicit); 76206b9927STom Stellard } 77206b9927STom Stellard 78206b9927STom Stellard bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, 79206b9927STom Stellard CCValAssign::LocInfo LocInfo, 80206b9927STom Stellard const CallLowering::ArgInfo &Info, 81fbaf425bSAmara Emerson ISD::ArgFlagsTy Flags, 82206b9927STom Stellard CCState &State) override { 83fbaf425bSAmara Emerson return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); 84206b9927STom Stellard } 85206b9927STom Stellard }; 86206b9927STom Stellard 87fecf43ebSMatt Arsenault struct IncomingArgHandler : public CallLowering::ValueHandler { 88fecf43ebSMatt Arsenault uint64_t StackUsed = 0; 89fecf43ebSMatt Arsenault 9006c8cb03SAustin Kerbow IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, 91fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 9206c8cb03SAustin Kerbow : ValueHandler(B, MRI, AssignFn) {} 93fecf43ebSMatt Arsenault 94fecf43ebSMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 95fecf43ebSMatt Arsenault MachinePointerInfo &MPO) override { 96fecf43ebSMatt Arsenault auto &MFI = MIRBuilder.getMF().getFrameInfo(); 97fecf43ebSMatt Arsenault int FI = MFI.CreateFixedObject(Size, Offset, true); 98fecf43ebSMatt Arsenault MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); 992a1b5af2SJay Foad auto AddrReg = MIRBuilder.buildFrameIndex( 1002a1b5af2SJay Foad LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), FI); 101fecf43ebSMatt Arsenault StackUsed = std::max(StackUsed, Size + Offset); 1022a1b5af2SJay Foad return AddrReg.getReg(0); 103fecf43ebSMatt Arsenault } 104fecf43ebSMatt Arsenault 105fecf43ebSMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 106fecf43ebSMatt Arsenault CCValAssign &VA) override { 107fecf43ebSMatt Arsenault markPhysRegUsed(PhysReg); 108fecf43ebSMatt Arsenault 109fecf43ebSMatt Arsenault if (VA.getLocVT().getSizeInBits() < 32) { 110fecf43ebSMatt Arsenault // 16-bit types are reported as legal for 32-bit registers. We need to do 111fecf43ebSMatt Arsenault // a 32-bit copy, and truncate to avoid the verifier complaining about it. 112fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); 113fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 114fecf43ebSMatt Arsenault return; 115fecf43ebSMatt Arsenault } 116fecf43ebSMatt Arsenault 117fecf43ebSMatt Arsenault switch (VA.getLocInfo()) { 118fecf43ebSMatt Arsenault case CCValAssign::LocInfo::SExt: 119fecf43ebSMatt Arsenault case CCValAssign::LocInfo::ZExt: 120fecf43ebSMatt Arsenault case CCValAssign::LocInfo::AExt: { 121fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); 122fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 123fecf43ebSMatt Arsenault break; 124fecf43ebSMatt Arsenault } 125fecf43ebSMatt Arsenault default: 126fecf43ebSMatt Arsenault MIRBuilder.buildCopy(ValVReg, PhysReg); 127fecf43ebSMatt Arsenault break; 128fecf43ebSMatt Arsenault } 129fecf43ebSMatt Arsenault } 130fecf43ebSMatt Arsenault 131fecf43ebSMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 132fecf43ebSMatt Arsenault MachinePointerInfo &MPO, CCValAssign &VA) override { 133fb0c35faSMatt Arsenault MachineFunction &MF = MIRBuilder.getMF(); 134fb0c35faSMatt Arsenault unsigned Align = inferAlignmentFromPtrInfo(MF, MPO); 135fb0c35faSMatt Arsenault 136fecf43ebSMatt Arsenault // FIXME: Get alignment 137fb0c35faSMatt Arsenault auto MMO = MF.getMachineMemOperand( 138fb0c35faSMatt Arsenault MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 139fb0c35faSMatt Arsenault Align); 140fecf43ebSMatt Arsenault MIRBuilder.buildLoad(ValVReg, Addr, *MMO); 141fecf43ebSMatt Arsenault } 142fecf43ebSMatt Arsenault 143fecf43ebSMatt Arsenault /// How the physical register gets marked varies between formal 144fecf43ebSMatt Arsenault /// parameters (it's a basic-block live-in), and a call instruction 145fecf43ebSMatt Arsenault /// (it's an implicit-def of the BL). 146fecf43ebSMatt Arsenault virtual void markPhysRegUsed(unsigned PhysReg) = 0; 147fecf43ebSMatt Arsenault 148fecf43ebSMatt Arsenault // FIXME: What is the point of this being a callback? 149bc1172dfSAmara Emerson bool isIncomingArgumentHandler() const override { return true; } 150fecf43ebSMatt Arsenault }; 151fecf43ebSMatt Arsenault 152fecf43ebSMatt Arsenault struct FormalArgHandler : public IncomingArgHandler { 15306c8cb03SAustin Kerbow FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, 154fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 15506c8cb03SAustin Kerbow : IncomingArgHandler(B, MRI, AssignFn) {} 156fecf43ebSMatt Arsenault 157fecf43ebSMatt Arsenault void markPhysRegUsed(unsigned PhysReg) override { 158fecf43ebSMatt Arsenault MIRBuilder.getMBB().addLiveIn(PhysReg); 159fecf43ebSMatt Arsenault } 160fecf43ebSMatt Arsenault }; 161fecf43ebSMatt Arsenault 162206b9927STom Stellard } 163206b9927STom Stellard 164000c5af3STom Stellard AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) 1650da6350dSMatt Arsenault : CallLowering(&TLI) { 166000c5af3STom Stellard } 167000c5af3STom Stellard 168eb416277SMatt Arsenault // FIXME: Compatability shim 169eb416277SMatt Arsenault static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) { 170eb416277SMatt Arsenault switch (MIOpc) { 171eb416277SMatt Arsenault case TargetOpcode::G_SEXT: 172eb416277SMatt Arsenault return ISD::SIGN_EXTEND; 173eb416277SMatt Arsenault case TargetOpcode::G_ZEXT: 174eb416277SMatt Arsenault return ISD::ZERO_EXTEND; 175eb416277SMatt Arsenault case TargetOpcode::G_ANYEXT: 176eb416277SMatt Arsenault return ISD::ANY_EXTEND; 177eb416277SMatt Arsenault default: 178eb416277SMatt Arsenault llvm_unreachable("not an extend opcode"); 179eb416277SMatt Arsenault } 180eb416277SMatt Arsenault } 181eb416277SMatt Arsenault 182fecf43ebSMatt Arsenault void AMDGPUCallLowering::splitToValueTypes( 183eb416277SMatt Arsenault MachineIRBuilder &B, 184eb416277SMatt Arsenault const ArgInfo &OrigArg, unsigned OrigArgIdx, 185eb416277SMatt Arsenault SmallVectorImpl<ArgInfo> &SplitArgs, 186eb416277SMatt Arsenault const DataLayout &DL, CallingConv::ID CallConv, 187fecf43ebSMatt Arsenault SplitArgTy PerformArgSplit) const { 188fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 189fecf43ebSMatt Arsenault LLVMContext &Ctx = OrigArg.Ty->getContext(); 190fecf43ebSMatt Arsenault 191fecf43ebSMatt Arsenault if (OrigArg.Ty->isVoidTy()) 192fecf43ebSMatt Arsenault return; 193fecf43ebSMatt Arsenault 194fecf43ebSMatt Arsenault SmallVector<EVT, 4> SplitVTs; 195fecf43ebSMatt Arsenault ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); 196fecf43ebSMatt Arsenault 197b60a2ae4SMatt Arsenault assert(OrigArg.Regs.size() == SplitVTs.size()); 198b60a2ae4SMatt Arsenault 199b60a2ae4SMatt Arsenault int SplitIdx = 0; 200b60a2ae4SMatt Arsenault for (EVT VT : SplitVTs) { 201eb416277SMatt Arsenault Register Reg = OrigArg.Regs[SplitIdx]; 202b60a2ae4SMatt Arsenault Type *Ty = VT.getTypeForEVT(Ctx); 203eb416277SMatt Arsenault LLT LLTy = getLLTForType(*Ty, DL); 204b60a2ae4SMatt Arsenault 205eb416277SMatt Arsenault if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) { 206eb416277SMatt Arsenault unsigned ExtendOp = TargetOpcode::G_ANYEXT; 207eb416277SMatt Arsenault if (OrigArg.Flags[0].isSExt()) { 208eb416277SMatt Arsenault assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); 209eb416277SMatt Arsenault ExtendOp = TargetOpcode::G_SEXT; 210eb416277SMatt Arsenault } else if (OrigArg.Flags[0].isZExt()) { 211eb416277SMatt Arsenault assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); 212eb416277SMatt Arsenault ExtendOp = TargetOpcode::G_ZEXT; 213eb416277SMatt Arsenault } 214b60a2ae4SMatt Arsenault 215eb416277SMatt Arsenault EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT, 216eb416277SMatt Arsenault extOpcodeToISDExtOpcode(ExtendOp)); 217eb416277SMatt Arsenault if (ExtVT != VT) { 218eb416277SMatt Arsenault VT = ExtVT; 219eb416277SMatt Arsenault Ty = ExtVT.getTypeForEVT(Ctx); 220eb416277SMatt Arsenault LLTy = getLLTForType(*Ty, DL); 221eb416277SMatt Arsenault Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0); 222eb416277SMatt Arsenault } 223eb416277SMatt Arsenault } 224eb416277SMatt Arsenault 225eb416277SMatt Arsenault unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); 226eb416277SMatt Arsenault MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); 227fecf43ebSMatt Arsenault 228fecf43ebSMatt Arsenault if (NumParts == 1) { 229*bb009498SMatt Arsenault // Fixup EVTs to an MVT. 230*bb009498SMatt Arsenault // 231*bb009498SMatt Arsenault // FIXME: This is pretty hacky. Why do we have to split the type 232*bb009498SMatt Arsenault // legalization logic between here and handleAssignments? 233*bb009498SMatt Arsenault if (OrigArgIdx != AttributeList::ReturnIndex && VT != RegVT) { 234*bb009498SMatt Arsenault assert(VT.getSizeInBits() < 32 && 235*bb009498SMatt Arsenault "unexpected illegal type"); 236*bb009498SMatt Arsenault Ty = Type::getInt32Ty(Ctx); 237*bb009498SMatt Arsenault Register OrigReg = Reg; 238*bb009498SMatt Arsenault Reg = B.getMRI()->createGenericVirtualRegister(LLT::scalar(32)); 239*bb009498SMatt Arsenault B.buildTrunc(OrigReg, Reg); 240*bb009498SMatt Arsenault } 241*bb009498SMatt Arsenault 242fecf43ebSMatt Arsenault // No splitting to do, but we want to replace the original type (e.g. [1 x 243fecf43ebSMatt Arsenault // double] -> double). 244eb416277SMatt Arsenault SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed); 245b60a2ae4SMatt Arsenault 246b60a2ae4SMatt Arsenault ++SplitIdx; 247b60a2ae4SMatt Arsenault continue; 248fecf43ebSMatt Arsenault } 249fecf43ebSMatt Arsenault 250fecf43ebSMatt Arsenault SmallVector<Register, 8> SplitRegs; 251eb416277SMatt Arsenault Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx); 252fecf43ebSMatt Arsenault LLT PartLLT = getLLTForType(*PartTy, DL); 253eb416277SMatt Arsenault MachineRegisterInfo &MRI = *B.getMRI(); 254fecf43ebSMatt Arsenault 255fecf43ebSMatt Arsenault // FIXME: Should we be reporting all of the part registers for a single 256fecf43ebSMatt Arsenault // argument, and let handleAssignments take care of the repacking? 257fecf43ebSMatt Arsenault for (unsigned i = 0; i < NumParts; ++i) { 258fecf43ebSMatt Arsenault Register PartReg = MRI.createGenericVirtualRegister(PartLLT); 259fecf43ebSMatt Arsenault SplitRegs.push_back(PartReg); 260fecf43ebSMatt Arsenault SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); 261fecf43ebSMatt Arsenault } 262fecf43ebSMatt Arsenault 263eb416277SMatt Arsenault PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx); 264b60a2ae4SMatt Arsenault 265b60a2ae4SMatt Arsenault ++SplitIdx; 266b60a2ae4SMatt Arsenault } 267fecf43ebSMatt Arsenault } 268fecf43ebSMatt Arsenault 269a9ea8a9aSMatt Arsenault // Get the appropriate type to make \p OrigTy \p Factor times bigger. 270a9ea8a9aSMatt Arsenault static LLT getMultipleType(LLT OrigTy, int Factor) { 271a9ea8a9aSMatt Arsenault if (OrigTy.isVector()) { 272a9ea8a9aSMatt Arsenault return LLT::vector(OrigTy.getNumElements() * Factor, 273a9ea8a9aSMatt Arsenault OrigTy.getElementType()); 274a9ea8a9aSMatt Arsenault } 275a9ea8a9aSMatt Arsenault 276a9ea8a9aSMatt Arsenault return LLT::scalar(OrigTy.getSizeInBits() * Factor); 277a9ea8a9aSMatt Arsenault } 278a9ea8a9aSMatt Arsenault 279a9ea8a9aSMatt Arsenault // TODO: Move to generic code 28006c8cb03SAustin Kerbow static void unpackRegsToOrigType(MachineIRBuilder &B, 281a9ea8a9aSMatt Arsenault ArrayRef<Register> DstRegs, 282a9ea8a9aSMatt Arsenault Register SrcReg, 283eb416277SMatt Arsenault const CallLowering::ArgInfo &Info, 284a9ea8a9aSMatt Arsenault LLT SrcTy, 285a9ea8a9aSMatt Arsenault LLT PartTy) { 286a9ea8a9aSMatt Arsenault assert(DstRegs.size() > 1 && "Nothing to unpack"); 287a9ea8a9aSMatt Arsenault 288a9ea8a9aSMatt Arsenault const unsigned SrcSize = SrcTy.getSizeInBits(); 289a9ea8a9aSMatt Arsenault const unsigned PartSize = PartTy.getSizeInBits(); 290a9ea8a9aSMatt Arsenault 291a9ea8a9aSMatt Arsenault if (SrcTy.isVector() && !PartTy.isVector() && 292a9ea8a9aSMatt Arsenault PartSize > SrcTy.getElementType().getSizeInBits()) { 293a9ea8a9aSMatt Arsenault // Vector was scalarized, and the elements extended. 29406c8cb03SAustin Kerbow auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), 295a9ea8a9aSMatt Arsenault SrcReg); 296a9ea8a9aSMatt Arsenault for (int i = 0, e = DstRegs.size(); i != e; ++i) 29706c8cb03SAustin Kerbow B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); 298a9ea8a9aSMatt Arsenault return; 299a9ea8a9aSMatt Arsenault } 300a9ea8a9aSMatt Arsenault 301a9ea8a9aSMatt Arsenault if (SrcSize % PartSize == 0) { 30206c8cb03SAustin Kerbow B.buildUnmerge(DstRegs, SrcReg); 303a9ea8a9aSMatt Arsenault return; 304a9ea8a9aSMatt Arsenault } 305a9ea8a9aSMatt Arsenault 306a9ea8a9aSMatt Arsenault const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; 307a9ea8a9aSMatt Arsenault 308a9ea8a9aSMatt Arsenault LLT BigTy = getMultipleType(PartTy, NumRoundedParts); 30906c8cb03SAustin Kerbow auto ImpDef = B.buildUndef(BigTy); 310a9ea8a9aSMatt Arsenault 3112a1b5af2SJay Foad auto Big = B.buildInsert(BigTy, ImpDef.getReg(0), SrcReg, 0).getReg(0); 312a9ea8a9aSMatt Arsenault 313a9ea8a9aSMatt Arsenault int64_t Offset = 0; 314a9ea8a9aSMatt Arsenault for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) 3152a1b5af2SJay Foad B.buildExtract(DstRegs[i], Big, Offset); 316a9ea8a9aSMatt Arsenault } 317a9ea8a9aSMatt Arsenault 318a9ea8a9aSMatt Arsenault /// Lower the return value for the already existing \p Ret. This assumes that 31906c8cb03SAustin Kerbow /// \p B's insertion point is correct. 32006c8cb03SAustin Kerbow bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B, 321a9ea8a9aSMatt Arsenault const Value *Val, ArrayRef<Register> VRegs, 322a9ea8a9aSMatt Arsenault MachineInstrBuilder &Ret) const { 323a9ea8a9aSMatt Arsenault if (!Val) 324a9ea8a9aSMatt Arsenault return true; 325a9ea8a9aSMatt Arsenault 32606c8cb03SAustin Kerbow auto &MF = B.getMF(); 327a9ea8a9aSMatt Arsenault const auto &F = MF.getFunction(); 328a9ea8a9aSMatt Arsenault const DataLayout &DL = MF.getDataLayout(); 329eb416277SMatt Arsenault MachineRegisterInfo *MRI = B.getMRI(); 330a9ea8a9aSMatt Arsenault 331a9ea8a9aSMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 332a9ea8a9aSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 333a9ea8a9aSMatt Arsenault 334a9ea8a9aSMatt Arsenault ArgInfo OrigRetInfo(VRegs, Val->getType()); 335a9ea8a9aSMatt Arsenault setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); 336a9ea8a9aSMatt Arsenault SmallVector<ArgInfo, 4> SplitRetInfos; 337a9ea8a9aSMatt Arsenault 338a9ea8a9aSMatt Arsenault splitToValueTypes( 339eb416277SMatt Arsenault B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC, 340eb416277SMatt Arsenault [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT, 341eb416277SMatt Arsenault int VTSplitIdx) { 342eb416277SMatt Arsenault unpackRegsToOrigType(B, Regs, SrcReg, 343eb416277SMatt Arsenault SplitRetInfos[VTSplitIdx], 344eb416277SMatt Arsenault LLTy, PartLLT); 345a9ea8a9aSMatt Arsenault }); 346a9ea8a9aSMatt Arsenault 347a9ea8a9aSMatt Arsenault CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); 348eb416277SMatt Arsenault OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn); 34906c8cb03SAustin Kerbow return handleAssignments(B, SplitRetInfos, RetHandler); 350a9ea8a9aSMatt Arsenault } 351a9ea8a9aSMatt Arsenault 35206c8cb03SAustin Kerbow bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, 35349168f67SAlexander Ivchenko const Value *Val, 354e3a676e9SMatt Arsenault ArrayRef<Register> VRegs) const { 355206b9927STom Stellard 35606c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 357206b9927STom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 358206b9927STom Stellard SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 359206b9927STom Stellard MFI->setIfReturnsVoid(!Val); 360206b9927STom Stellard 361a9ea8a9aSMatt Arsenault assert(!Val == VRegs.empty() && "Return value without a vreg"); 362a9ea8a9aSMatt Arsenault 36306c8cb03SAustin Kerbow CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); 364a9ea8a9aSMatt Arsenault const bool IsShader = AMDGPU::isShader(CC); 365a9ea8a9aSMatt Arsenault const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || 366a9ea8a9aSMatt Arsenault AMDGPU::isKernel(CC); 367a9ea8a9aSMatt Arsenault if (IsWaveEnd) { 36806c8cb03SAustin Kerbow B.buildInstr(AMDGPU::S_ENDPGM) 369a9ea8a9aSMatt Arsenault .addImm(0); 370206b9927STom Stellard return true; 371206b9927STom Stellard } 372206b9927STom Stellard 373eb416277SMatt Arsenault auto const &ST = MF.getSubtarget<GCNSubtarget>(); 374206b9927STom Stellard 375711556e6SMichael Liao unsigned ReturnOpc = 376711556e6SMichael Liao IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; 377257882ffSTom Stellard 37806c8cb03SAustin Kerbow auto Ret = B.buildInstrNoInsert(ReturnOpc); 379a9ea8a9aSMatt Arsenault Register ReturnAddrVReg; 380a9ea8a9aSMatt Arsenault if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { 381a9ea8a9aSMatt Arsenault ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); 382a9ea8a9aSMatt Arsenault Ret.addUse(ReturnAddrVReg); 383206b9927STom Stellard } 384206b9927STom Stellard 38506c8cb03SAustin Kerbow if (!lowerReturnVal(B, Val, VRegs, Ret)) 386a9ea8a9aSMatt Arsenault return false; 387a9ea8a9aSMatt Arsenault 388a9ea8a9aSMatt Arsenault if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { 389a9ea8a9aSMatt Arsenault const SIRegisterInfo *TRI = ST.getRegisterInfo(); 390a9ea8a9aSMatt Arsenault Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), 391a9ea8a9aSMatt Arsenault &AMDGPU::SGPR_64RegClass); 39206c8cb03SAustin Kerbow B.buildCopy(ReturnAddrVReg, LiveInReturn); 393a9ea8a9aSMatt Arsenault } 394a9ea8a9aSMatt Arsenault 395a9ea8a9aSMatt Arsenault // TODO: Handle CalleeSavedRegsViaCopy. 396a9ea8a9aSMatt Arsenault 39706c8cb03SAustin Kerbow B.insertInstr(Ret); 398000c5af3STom Stellard return true; 399000c5af3STom Stellard } 400000c5af3STom Stellard 40106c8cb03SAustin Kerbow Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B, 402ca16621bSTom Stellard Type *ParamTy, 40329f30379SMatt Arsenault uint64_t Offset) const { 404ca16621bSTom Stellard 40506c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 4068623e8d8SMatt Arsenault const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 407ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 408f1caa283SMatthias Braun const Function &F = MF.getFunction(); 409ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 4100da6350dSMatt Arsenault PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 41152b4ce72SDaniel Sanders LLT PtrType = getLLTForType(*PtrTy, DL); 412faeaedf8SMatt Arsenault Register KernArgSegmentPtr = 4138623e8d8SMatt Arsenault MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 414faeaedf8SMatt Arsenault Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); 415ca16621bSTom Stellard 4162a1b5af2SJay Foad auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset); 417ca16621bSTom Stellard 4182a1b5af2SJay Foad return B.buildPtrAdd(PtrType, KernArgSegmentVReg, OffsetReg).getReg(0); 419ca16621bSTom Stellard } 420ca16621bSTom Stellard 42106c8cb03SAustin Kerbow void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, 42229f30379SMatt Arsenault Type *ParamTy, uint64_t Offset, 42329f30379SMatt Arsenault unsigned Align, 424e3a676e9SMatt Arsenault Register DstReg) const { 42506c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 426f1caa283SMatthias Braun const Function &F = MF.getFunction(); 427ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 428c7c05b0cSJay Foad MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 429ca16621bSTom Stellard unsigned TypeSize = DL.getTypeStoreSize(ParamTy); 43006c8cb03SAustin Kerbow Register PtrReg = lowerParameterPtr(B, ParamTy, Offset); 431ca16621bSTom Stellard 432ca16621bSTom Stellard MachineMemOperand *MMO = 433ca16621bSTom Stellard MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | 4347df225dfSMatt Arsenault MachineMemOperand::MODereferenceable | 435ca16621bSTom Stellard MachineMemOperand::MOInvariant, 436ca16621bSTom Stellard TypeSize, Align); 437ca16621bSTom Stellard 43806c8cb03SAustin Kerbow B.buildLoad(DstReg, PtrReg, *MMO); 439ca16621bSTom Stellard } 440ca16621bSTom Stellard 441bae3636fSMatt Arsenault // Allocate special inputs passed in user SGPRs. 442bae3636fSMatt Arsenault static void allocateHSAUserSGPRs(CCState &CCInfo, 44306c8cb03SAustin Kerbow MachineIRBuilder &B, 444bae3636fSMatt Arsenault MachineFunction &MF, 445bae3636fSMatt Arsenault const SIRegisterInfo &TRI, 446bae3636fSMatt Arsenault SIMachineFunctionInfo &Info) { 447bae3636fSMatt Arsenault // FIXME: How should these inputs interact with inreg / custom SGPR inputs? 448bae3636fSMatt Arsenault if (Info.hasPrivateSegmentBuffer()) { 449bae3636fSMatt Arsenault unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); 450bae3636fSMatt Arsenault MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); 451bae3636fSMatt Arsenault CCInfo.AllocateReg(PrivateSegmentBufferReg); 452bae3636fSMatt Arsenault } 453bae3636fSMatt Arsenault 454bae3636fSMatt Arsenault if (Info.hasDispatchPtr()) { 455bae3636fSMatt Arsenault unsigned DispatchPtrReg = Info.addDispatchPtr(TRI); 456bae3636fSMatt Arsenault MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); 457bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchPtrReg); 458bae3636fSMatt Arsenault } 459bae3636fSMatt Arsenault 460bae3636fSMatt Arsenault if (Info.hasQueuePtr()) { 461bae3636fSMatt Arsenault unsigned QueuePtrReg = Info.addQueuePtr(TRI); 462bae3636fSMatt Arsenault MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); 463bae3636fSMatt Arsenault CCInfo.AllocateReg(QueuePtrReg); 464bae3636fSMatt Arsenault } 465bae3636fSMatt Arsenault 466bae3636fSMatt Arsenault if (Info.hasKernargSegmentPtr()) { 467bae3636fSMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 468bae3636fSMatt Arsenault Register InputPtrReg = Info.addKernargSegmentPtr(TRI); 469bae3636fSMatt Arsenault const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); 470bae3636fSMatt Arsenault Register VReg = MRI.createGenericVirtualRegister(P4); 471bae3636fSMatt Arsenault MRI.addLiveIn(InputPtrReg, VReg); 47206c8cb03SAustin Kerbow B.getMBB().addLiveIn(InputPtrReg); 47306c8cb03SAustin Kerbow B.buildCopy(VReg, InputPtrReg); 474bae3636fSMatt Arsenault CCInfo.AllocateReg(InputPtrReg); 475bae3636fSMatt Arsenault } 476bae3636fSMatt Arsenault 477bae3636fSMatt Arsenault if (Info.hasDispatchID()) { 478bae3636fSMatt Arsenault unsigned DispatchIDReg = Info.addDispatchID(TRI); 479bae3636fSMatt Arsenault MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); 480bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchIDReg); 481bae3636fSMatt Arsenault } 482bae3636fSMatt Arsenault 483bae3636fSMatt Arsenault if (Info.hasFlatScratchInit()) { 484bae3636fSMatt Arsenault unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI); 485bae3636fSMatt Arsenault MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); 486bae3636fSMatt Arsenault CCInfo.AllocateReg(FlatScratchInitReg); 487bae3636fSMatt Arsenault } 488bae3636fSMatt Arsenault 489bae3636fSMatt Arsenault // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read 490bae3636fSMatt Arsenault // these from the dispatch pointer. 491bae3636fSMatt Arsenault } 492bae3636fSMatt Arsenault 493b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArgumentsKernel( 49406c8cb03SAustin Kerbow MachineIRBuilder &B, const Function &F, 495c3dbe239SDiana Picus ArrayRef<ArrayRef<Register>> VRegs) const { 49606c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 4975bfbae5cSTom Stellard const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); 498ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 499ca16621bSTom Stellard SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 500fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); 501fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 502fecf43ebSMatt Arsenault 503ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 504ca16621bSTom Stellard 505ca16621bSTom Stellard SmallVector<CCValAssign, 16> ArgLocs; 506ca16621bSTom Stellard CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); 507ca16621bSTom Stellard 50806c8cb03SAustin Kerbow allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info); 509bae3636fSMatt Arsenault 51029f30379SMatt Arsenault unsigned i = 0; 51129f30379SMatt Arsenault const unsigned KernArgBaseAlign = 16; 51229f30379SMatt Arsenault const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); 51329f30379SMatt Arsenault uint64_t ExplicitArgOffset = 0; 51429f30379SMatt Arsenault 51529f30379SMatt Arsenault // TODO: Align down to dword alignment and extract bits for extending loads. 51629f30379SMatt Arsenault for (auto &Arg : F.args()) { 51729f30379SMatt Arsenault Type *ArgTy = Arg.getType(); 51829f30379SMatt Arsenault unsigned AllocSize = DL.getTypeAllocSize(ArgTy); 51929f30379SMatt Arsenault if (AllocSize == 0) 52029f30379SMatt Arsenault continue; 52129f30379SMatt Arsenault 52229f30379SMatt Arsenault unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); 52329f30379SMatt Arsenault 52429f30379SMatt Arsenault uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; 52529f30379SMatt Arsenault ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; 52629f30379SMatt Arsenault 527c3dbe239SDiana Picus ArrayRef<Register> OrigArgRegs = VRegs[i]; 528c3dbe239SDiana Picus Register ArgReg = 529c3dbe239SDiana Picus OrigArgRegs.size() == 1 530c3dbe239SDiana Picus ? OrigArgRegs[0] 531c3dbe239SDiana Picus : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); 53229f30379SMatt Arsenault unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); 53329f30379SMatt Arsenault ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); 53406c8cb03SAustin Kerbow lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg); 535c3dbe239SDiana Picus if (OrigArgRegs.size() > 1) 53606c8cb03SAustin Kerbow unpackRegs(OrigArgRegs, ArgReg, ArgTy, B); 53729f30379SMatt Arsenault ++i; 53829f30379SMatt Arsenault } 53929f30379SMatt Arsenault 540fecf43ebSMatt Arsenault TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); 541fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); 54229f30379SMatt Arsenault return true; 54329f30379SMatt Arsenault } 54429f30379SMatt Arsenault 5459e1d2afcSMatt Arsenault /// Pack values \p SrcRegs to cover the vector type result \p DstRegs. 5469e1d2afcSMatt Arsenault static MachineInstrBuilder mergeVectorRegsToResultRegs( 5479e1d2afcSMatt Arsenault MachineIRBuilder &B, ArrayRef<Register> DstRegs, ArrayRef<Register> SrcRegs) { 5489e1d2afcSMatt Arsenault MachineRegisterInfo &MRI = *B.getMRI(); 5499e1d2afcSMatt Arsenault LLT LLTy = MRI.getType(DstRegs[0]); 5509e1d2afcSMatt Arsenault LLT PartLLT = MRI.getType(SrcRegs[0]); 5519e1d2afcSMatt Arsenault 5529e1d2afcSMatt Arsenault // Deal with v3s16 split into v2s16 5539e1d2afcSMatt Arsenault LLT LCMTy = getLCMType(LLTy, PartLLT); 5549e1d2afcSMatt Arsenault if (LCMTy == LLTy) { 5559e1d2afcSMatt Arsenault // Common case where no padding is needed. 5569e1d2afcSMatt Arsenault assert(DstRegs.size() == 1); 5579e1d2afcSMatt Arsenault return B.buildConcatVectors(DstRegs[0], SrcRegs); 5589e1d2afcSMatt Arsenault } 5599e1d2afcSMatt Arsenault 5609e1d2afcSMatt Arsenault const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); 5619e1d2afcSMatt Arsenault Register Undef = B.buildUndef(PartLLT).getReg(0); 5629e1d2afcSMatt Arsenault 5639e1d2afcSMatt Arsenault // Build vector of undefs. 5649e1d2afcSMatt Arsenault SmallVector<Register, 8> WidenedSrcs(NumWide, Undef); 5659e1d2afcSMatt Arsenault 5669e1d2afcSMatt Arsenault // Replace the first sources with the real registers. 5679e1d2afcSMatt Arsenault std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); 5689e1d2afcSMatt Arsenault 5699e1d2afcSMatt Arsenault auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs); 5709e1d2afcSMatt Arsenault int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); 5719e1d2afcSMatt Arsenault 5729e1d2afcSMatt Arsenault SmallVector<Register, 8> PadDstRegs(NumDst); 5739e1d2afcSMatt Arsenault std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin()); 5749e1d2afcSMatt Arsenault 5759e1d2afcSMatt Arsenault // Create the excess dead defs for the unmerge. 5769e1d2afcSMatt Arsenault for (int I = DstRegs.size(); I != NumDst; ++I) 5779e1d2afcSMatt Arsenault PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); 5789e1d2afcSMatt Arsenault 5799e1d2afcSMatt Arsenault return B.buildUnmerge(PadDstRegs, Widened); 5809e1d2afcSMatt Arsenault } 5819e1d2afcSMatt Arsenault 582a9ea8a9aSMatt Arsenault // TODO: Move this to generic code 58306c8cb03SAustin Kerbow static void packSplitRegsToOrigType(MachineIRBuilder &B, 584fecf43ebSMatt Arsenault ArrayRef<Register> OrigRegs, 585fecf43ebSMatt Arsenault ArrayRef<Register> Regs, 586fecf43ebSMatt Arsenault LLT LLTy, 587fecf43ebSMatt Arsenault LLT PartLLT) { 588c460dc6eSMatt Arsenault MachineRegisterInfo &MRI = *B.getMRI(); 589c460dc6eSMatt Arsenault 590fecf43ebSMatt Arsenault if (!LLTy.isVector() && !PartLLT.isVector()) { 591c460dc6eSMatt Arsenault assert(OrigRegs.size() == 1); 592c460dc6eSMatt Arsenault LLT OrigTy = MRI.getType(OrigRegs[0]); 593c460dc6eSMatt Arsenault 594c460dc6eSMatt Arsenault unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size(); 595c460dc6eSMatt Arsenault if (SrcSize == OrigTy.getSizeInBits()) 59606c8cb03SAustin Kerbow B.buildMerge(OrigRegs[0], Regs); 597c460dc6eSMatt Arsenault else { 598c460dc6eSMatt Arsenault auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs); 599c460dc6eSMatt Arsenault B.buildTrunc(OrigRegs[0], Widened); 600c460dc6eSMatt Arsenault } 601c460dc6eSMatt Arsenault 602fecf43ebSMatt Arsenault return; 603fecf43ebSMatt Arsenault } 604fecf43ebSMatt Arsenault 605fecf43ebSMatt Arsenault if (LLTy.isVector() && PartLLT.isVector()) { 6069e1d2afcSMatt Arsenault assert(OrigRegs.size() == 1); 607fecf43ebSMatt Arsenault assert(LLTy.getElementType() == PartLLT.getElementType()); 6089e1d2afcSMatt Arsenault mergeVectorRegsToResultRegs(B, OrigRegs, Regs); 609fecf43ebSMatt Arsenault return; 610fecf43ebSMatt Arsenault } 611fecf43ebSMatt Arsenault 612fecf43ebSMatt Arsenault assert(LLTy.isVector() && !PartLLT.isVector()); 613fecf43ebSMatt Arsenault 614fecf43ebSMatt Arsenault LLT DstEltTy = LLTy.getElementType(); 615767aa507SMatt Arsenault 616767aa507SMatt Arsenault // Pointer information was discarded. We'll need to coerce some register types 617767aa507SMatt Arsenault // to avoid violating type constraints. 618767aa507SMatt Arsenault LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType(); 619767aa507SMatt Arsenault 620767aa507SMatt Arsenault assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); 621767aa507SMatt Arsenault 622fecf43ebSMatt Arsenault if (DstEltTy == PartLLT) { 623fecf43ebSMatt Arsenault // Vector was trivially scalarized. 624767aa507SMatt Arsenault 625767aa507SMatt Arsenault if (RealDstEltTy.isPointer()) { 626767aa507SMatt Arsenault for (Register Reg : Regs) 627767aa507SMatt Arsenault MRI.setType(Reg, RealDstEltTy); 628767aa507SMatt Arsenault } 629767aa507SMatt Arsenault 63006c8cb03SAustin Kerbow B.buildBuildVector(OrigRegs[0], Regs); 631fecf43ebSMatt Arsenault } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { 632fecf43ebSMatt Arsenault // Deal with vector with 64-bit elements decomposed to 32-bit 633fecf43ebSMatt Arsenault // registers. Need to create intermediate 64-bit elements. 634fecf43ebSMatt Arsenault SmallVector<Register, 8> EltMerges; 635fecf43ebSMatt Arsenault int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); 636fecf43ebSMatt Arsenault 637fecf43ebSMatt Arsenault assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); 638fecf43ebSMatt Arsenault 639fecf43ebSMatt Arsenault for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { 640767aa507SMatt Arsenault auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt)); 641767aa507SMatt Arsenault // Fix the type in case this is really a vector of pointers. 642767aa507SMatt Arsenault MRI.setType(Merge.getReg(0), RealDstEltTy); 643fecf43ebSMatt Arsenault EltMerges.push_back(Merge.getReg(0)); 644fecf43ebSMatt Arsenault Regs = Regs.drop_front(PartsPerElt); 645fecf43ebSMatt Arsenault } 646fecf43ebSMatt Arsenault 64706c8cb03SAustin Kerbow B.buildBuildVector(OrigRegs[0], EltMerges); 648fecf43ebSMatt Arsenault } else { 649fecf43ebSMatt Arsenault // Vector was split, and elements promoted to a wider type. 650fecf43ebSMatt Arsenault LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); 65106c8cb03SAustin Kerbow auto BV = B.buildBuildVector(BVType, Regs); 65206c8cb03SAustin Kerbow B.buildTrunc(OrigRegs[0], BV); 653fecf43ebSMatt Arsenault } 654fecf43ebSMatt Arsenault } 655fecf43ebSMatt Arsenault 656b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArguments( 65706c8cb03SAustin Kerbow MachineIRBuilder &B, const Function &F, 658b725d273SMatt Arsenault ArrayRef<ArrayRef<Register>> VRegs) const { 659fecf43ebSMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 660fecf43ebSMatt Arsenault 661b725d273SMatt Arsenault // The infrastructure for normal calling convention lowering is essentially 662b725d273SMatt Arsenault // useless for kernels. We want to avoid any kind of legalization or argument 663b725d273SMatt Arsenault // splitting. 664fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_KERNEL) 66506c8cb03SAustin Kerbow return lowerFormalArgumentsKernel(B, F, VRegs); 666b725d273SMatt Arsenault 667fecf43ebSMatt Arsenault const bool IsShader = AMDGPU::isShader(CC); 668fecf43ebSMatt Arsenault const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); 669fecf43ebSMatt Arsenault 67006c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 67106c8cb03SAustin Kerbow MachineBasicBlock &MBB = B.getMBB(); 672b725d273SMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 673b725d273SMatt Arsenault SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 674fecf43ebSMatt Arsenault const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 675fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); 676b725d273SMatt Arsenault const DataLayout &DL = F.getParent()->getDataLayout(); 677b725d273SMatt Arsenault 678b725d273SMatt Arsenault 679b725d273SMatt Arsenault SmallVector<CCValAssign, 16> ArgLocs; 680fecf43ebSMatt Arsenault CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); 681b725d273SMatt Arsenault 682a9ea8a9aSMatt Arsenault if (!IsEntryFunc) { 683a9ea8a9aSMatt Arsenault Register ReturnAddrReg = TRI->getReturnAddressReg(MF); 684a9ea8a9aSMatt Arsenault Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, 685a9ea8a9aSMatt Arsenault &AMDGPU::SGPR_64RegClass); 686a9ea8a9aSMatt Arsenault MBB.addLiveIn(ReturnAddrReg); 68706c8cb03SAustin Kerbow B.buildCopy(LiveInReturn, ReturnAddrReg); 688a9ea8a9aSMatt Arsenault } 689a9ea8a9aSMatt Arsenault 690bae3636fSMatt Arsenault if (Info->hasImplicitBufferPtr()) { 691fecf43ebSMatt Arsenault Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); 692bae3636fSMatt Arsenault MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); 693bae3636fSMatt Arsenault CCInfo.AllocateReg(ImplicitBufferPtrReg); 694bae3636fSMatt Arsenault } 695bae3636fSMatt Arsenault 696fecf43ebSMatt Arsenault 697fecf43ebSMatt Arsenault SmallVector<ArgInfo, 32> SplitArgs; 698fecf43ebSMatt Arsenault unsigned Idx = 0; 699c7709e1cSTom Stellard unsigned PSInputNum = 0; 7009d8337d8STom Stellard 701fecf43ebSMatt Arsenault for (auto &Arg : F.args()) { 702fecf43ebSMatt Arsenault if (DL.getTypeStoreSize(Arg.getType()) == 0) 703c7709e1cSTom Stellard continue; 704c7709e1cSTom Stellard 705fecf43ebSMatt Arsenault const bool InReg = Arg.hasAttribute(Attribute::InReg); 706fecf43ebSMatt Arsenault 707fecf43ebSMatt Arsenault // SGPR arguments to functions not implemented. 708fecf43ebSMatt Arsenault if (!IsShader && InReg) 709fecf43ebSMatt Arsenault return false; 710fecf43ebSMatt Arsenault 711a9ea8a9aSMatt Arsenault if (Arg.hasAttribute(Attribute::SwiftSelf) || 712fecf43ebSMatt Arsenault Arg.hasAttribute(Attribute::SwiftError) || 713b60a2ae4SMatt Arsenault Arg.hasAttribute(Attribute::Nest)) 714fecf43ebSMatt Arsenault return false; 715fecf43ebSMatt Arsenault 716fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { 717fecf43ebSMatt Arsenault const bool ArgUsed = !Arg.use_empty(); 718fecf43ebSMatt Arsenault bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); 719fecf43ebSMatt Arsenault 720fecf43ebSMatt Arsenault if (!SkipArg) { 721c7709e1cSTom Stellard Info->markPSInputAllocated(PSInputNum); 722fecf43ebSMatt Arsenault if (ArgUsed) 723c7709e1cSTom Stellard Info->markPSInputEnabled(PSInputNum); 724fecf43ebSMatt Arsenault } 725c7709e1cSTom Stellard 726c7709e1cSTom Stellard ++PSInputNum; 727c7709e1cSTom Stellard 728fecf43ebSMatt Arsenault if (SkipArg) { 729b60a2ae4SMatt Arsenault for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) 73006c8cb03SAustin Kerbow B.buildUndef(VRegs[Idx][I]); 731b60a2ae4SMatt Arsenault 732fecf43ebSMatt Arsenault ++Idx; 733c7709e1cSTom Stellard continue; 734fecf43ebSMatt Arsenault } 7359d8337d8STom Stellard } 736e0a4da8cSMatt Arsenault 737fecf43ebSMatt Arsenault ArgInfo OrigArg(VRegs[Idx], Arg.getType()); 738eb416277SMatt Arsenault const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex; 739eb416277SMatt Arsenault setArgFlags(OrigArg, OrigArgIdx, DL, F); 740b60a2ae4SMatt Arsenault 741b60a2ae4SMatt Arsenault splitToValueTypes( 742eb416277SMatt Arsenault B, OrigArg, OrigArgIdx, SplitArgs, DL, CC, 743fecf43ebSMatt Arsenault // FIXME: We should probably be passing multiple registers to 744fecf43ebSMatt Arsenault // handleAssignments to do this 745eb416277SMatt Arsenault [&](ArrayRef<Register> Regs, Register DstReg, 746eb416277SMatt Arsenault LLT LLTy, LLT PartLLT, int VTSplitIdx) { 747eb416277SMatt Arsenault assert(DstReg == VRegs[Idx][VTSplitIdx]); 74806c8cb03SAustin Kerbow packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, 749b60a2ae4SMatt Arsenault LLTy, PartLLT); 750fecf43ebSMatt Arsenault }); 751fecf43ebSMatt Arsenault 752fecf43ebSMatt Arsenault ++Idx; 7539d8337d8STom Stellard } 7549d8337d8STom Stellard 755fecf43ebSMatt Arsenault // At least one interpolation mode must be enabled or else the GPU will 756fecf43ebSMatt Arsenault // hang. 757fecf43ebSMatt Arsenault // 758fecf43ebSMatt Arsenault // Check PSInputAddr instead of PSInputEnable. The idea is that if the user 759fecf43ebSMatt Arsenault // set PSInputAddr, the user wants to enable some bits after the compilation 760fecf43ebSMatt Arsenault // based on run-time states. Since we can't know what the final PSInputEna 761fecf43ebSMatt Arsenault // will look like, so we shouldn't do anything here and the user should take 762fecf43ebSMatt Arsenault // responsibility for the correct programming. 763fecf43ebSMatt Arsenault // 764fecf43ebSMatt Arsenault // Otherwise, the following restrictions apply: 765fecf43ebSMatt Arsenault // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. 766fecf43ebSMatt Arsenault // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be 767fecf43ebSMatt Arsenault // enabled too. 768fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS) { 769fecf43ebSMatt Arsenault if ((Info->getPSInputAddr() & 0x7F) == 0 || 770fecf43ebSMatt Arsenault ((Info->getPSInputAddr() & 0xF) == 0 && 771fecf43ebSMatt Arsenault Info->isPSInputAllocated(11))) { 772fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR0); 773fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR1); 774fecf43ebSMatt Arsenault Info->markPSInputAllocated(0); 775fecf43ebSMatt Arsenault Info->markPSInputEnabled(0); 776fecf43ebSMatt Arsenault } 777fecf43ebSMatt Arsenault 778fecf43ebSMatt Arsenault if (Subtarget.isAmdPalOS()) { 779fecf43ebSMatt Arsenault // For isAmdPalOS, the user does not enable some bits after compilation 780fecf43ebSMatt Arsenault // based on run-time states; the register values being generated here are 781fecf43ebSMatt Arsenault // the final ones set in hardware. Therefore we need to apply the 782fecf43ebSMatt Arsenault // workaround to PSInputAddr and PSInputEnable together. (The case where 783fecf43ebSMatt Arsenault // a bit is set in PSInputAddr but not PSInputEnable is where the frontend 784fecf43ebSMatt Arsenault // set up an input arg for a particular interpolation mode, but nothing 785fecf43ebSMatt Arsenault // uses that input arg. Really we should have an earlier pass that removes 786fecf43ebSMatt Arsenault // such an arg.) 787fecf43ebSMatt Arsenault unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); 788fecf43ebSMatt Arsenault if ((PsInputBits & 0x7F) == 0 || 789fecf43ebSMatt Arsenault ((PsInputBits & 0xF) == 0 && 790fecf43ebSMatt Arsenault (PsInputBits >> 11 & 1))) 791fecf43ebSMatt Arsenault Info->markPSInputEnabled( 792fecf43ebSMatt Arsenault countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); 793fecf43ebSMatt Arsenault } 794fecf43ebSMatt Arsenault } 795fecf43ebSMatt Arsenault 796fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 797fecf43ebSMatt Arsenault CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); 798fecf43ebSMatt Arsenault 799fecf43ebSMatt Arsenault if (!MBB.empty()) 80006c8cb03SAustin Kerbow B.setInstr(*MBB.begin()); 801fecf43ebSMatt Arsenault 80206c8cb03SAustin Kerbow FormalArgHandler Handler(B, MRI, AssignFn); 80306c8cb03SAustin Kerbow if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler)) 80429f30379SMatt Arsenault return false; 805fecf43ebSMatt Arsenault 806fecf43ebSMatt Arsenault if (!IsEntryFunc) { 807fecf43ebSMatt Arsenault // Special inputs come after user arguments. 808fecf43ebSMatt Arsenault TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); 809fecf43ebSMatt Arsenault } 810fecf43ebSMatt Arsenault 811fecf43ebSMatt Arsenault // Start adding system SGPRs. 812fecf43ebSMatt Arsenault if (IsEntryFunc) { 813fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); 814fecf43ebSMatt Arsenault } else { 815fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getScratchRSrcReg()); 816fecf43ebSMatt Arsenault TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); 817fecf43ebSMatt Arsenault } 818fecf43ebSMatt Arsenault 819fecf43ebSMatt Arsenault // Move back to the end of the basic block. 82006c8cb03SAustin Kerbow B.setMBB(MBB); 821fecf43ebSMatt Arsenault 822fecf43ebSMatt Arsenault return true; 823000c5af3STom Stellard } 824