1d8ea85acSTom Stellard //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// 2000c5af3STom Stellard // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6000c5af3STom Stellard // 7000c5af3STom Stellard //===----------------------------------------------------------------------===// 8000c5af3STom Stellard /// 9000c5af3STom Stellard /// \file 10000c5af3STom Stellard /// This file implements the lowering of LLVM calls to machine code calls for 11000c5af3STom Stellard /// GlobalISel. 12000c5af3STom Stellard /// 13000c5af3STom Stellard //===----------------------------------------------------------------------===// 14000c5af3STom Stellard 15000c5af3STom Stellard #include "AMDGPUCallLowering.h" 16ca16621bSTom Stellard #include "AMDGPU.h" 17000c5af3STom Stellard #include "AMDGPUISelLowering.h" 18ca16621bSTom Stellard #include "AMDGPUSubtarget.h" 19a162048aSMatt Arsenault #include "AMDGPUTargetMachine.h" 20ca16621bSTom Stellard #include "SIISelLowering.h" 21ca16621bSTom Stellard #include "SIMachineFunctionInfo.h" 226bda14b3SChandler Carruth #include "SIRegisterInfo.h" 2344b30b45STom Stellard #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 24206b9927STom Stellard #include "llvm/CodeGen/Analysis.h" 25ca16621bSTom Stellard #include "llvm/CodeGen/CallingConvLower.h" 26000c5af3STom Stellard #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 27000c5af3STom Stellard #include "llvm/CodeGen/MachineInstrBuilder.h" 28206b9927STom Stellard #include "llvm/Support/LowLevelTypeImpl.h" 29000c5af3STom Stellard 30000c5af3STom Stellard using namespace llvm; 31000c5af3STom Stellard 32206b9927STom Stellard namespace { 33206b9927STom Stellard 34a9ea8a9aSMatt Arsenault struct OutgoingValueHandler : public CallLowering::ValueHandler { 3506c8cb03SAustin Kerbow OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, 36206b9927STom Stellard MachineInstrBuilder MIB, CCAssignFn *AssignFn) 3706c8cb03SAustin Kerbow : ValueHandler(B, MRI, AssignFn), MIB(MIB) {} 38206b9927STom Stellard 39206b9927STom Stellard MachineInstrBuilder MIB; 40206b9927STom Stellard 419f9151d4SQuentin Colombet bool isIncomingArgumentHandler() const override { return false; } 429f9151d4SQuentin Colombet 43faeaedf8SMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 44206b9927STom Stellard MachinePointerInfo &MPO) override { 45206b9927STom Stellard llvm_unreachable("not implemented"); 46206b9927STom Stellard } 47206b9927STom Stellard 48faeaedf8SMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 49206b9927STom Stellard MachinePointerInfo &MPO, CCValAssign &VA) override { 50206b9927STom Stellard llvm_unreachable("not implemented"); 51206b9927STom Stellard } 52206b9927STom Stellard 53faeaedf8SMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 54206b9927STom Stellard CCValAssign &VA) override { 55a9ea8a9aSMatt Arsenault Register ExtReg; 56a9ea8a9aSMatt Arsenault if (VA.getLocVT().getSizeInBits() < 32) { 57a9ea8a9aSMatt Arsenault // 16-bit types are reported as legal for 32-bit registers. We need to 58a9ea8a9aSMatt Arsenault // extend and do a 32-bit copy to avoid the verifier complaining about it. 59a9ea8a9aSMatt Arsenault ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); 60a9ea8a9aSMatt Arsenault } else 61a9ea8a9aSMatt Arsenault ExtReg = extendRegister(ValVReg, VA); 62a9ea8a9aSMatt Arsenault 6367cfbec7SMatt Arsenault // If this is a scalar return, insert a readfirstlane just in case the value 6467cfbec7SMatt Arsenault // ends up in a VGPR. 6567cfbec7SMatt Arsenault // FIXME: Assert this is a shader return. 6667cfbec7SMatt Arsenault const SIRegisterInfo *TRI 6767cfbec7SMatt Arsenault = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo()); 6867cfbec7SMatt Arsenault if (TRI->isSGPRReg(MRI, PhysReg)) { 6967cfbec7SMatt Arsenault auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane, 7067cfbec7SMatt Arsenault {MRI.getType(ExtReg)}, false) 7167cfbec7SMatt Arsenault .addReg(ExtReg); 7267cfbec7SMatt Arsenault ExtReg = ToSGPR.getReg(0); 7367cfbec7SMatt Arsenault } 7467cfbec7SMatt Arsenault 75a9ea8a9aSMatt Arsenault MIRBuilder.buildCopy(PhysReg, ExtReg); 76a9ea8a9aSMatt Arsenault MIB.addUse(PhysReg, RegState::Implicit); 77206b9927STom Stellard } 78206b9927STom Stellard 79206b9927STom Stellard bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, 80206b9927STom Stellard CCValAssign::LocInfo LocInfo, 81206b9927STom Stellard const CallLowering::ArgInfo &Info, 82fbaf425bSAmara Emerson ISD::ArgFlagsTy Flags, 83206b9927STom Stellard CCState &State) override { 84fbaf425bSAmara Emerson return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); 85206b9927STom Stellard } 86206b9927STom Stellard }; 87206b9927STom Stellard 88fecf43ebSMatt Arsenault struct IncomingArgHandler : public CallLowering::ValueHandler { 89fecf43ebSMatt Arsenault uint64_t StackUsed = 0; 90fecf43ebSMatt Arsenault 9106c8cb03SAustin Kerbow IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, 92fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 9306c8cb03SAustin Kerbow : ValueHandler(B, MRI, AssignFn) {} 94fecf43ebSMatt Arsenault 95fecf43ebSMatt Arsenault Register getStackAddress(uint64_t Size, int64_t Offset, 96fecf43ebSMatt Arsenault MachinePointerInfo &MPO) override { 97fecf43ebSMatt Arsenault auto &MFI = MIRBuilder.getMF().getFrameInfo(); 98fecf43ebSMatt Arsenault int FI = MFI.CreateFixedObject(Size, Offset, true); 99fecf43ebSMatt Arsenault MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); 1002a1b5af2SJay Foad auto AddrReg = MIRBuilder.buildFrameIndex( 1012a1b5af2SJay Foad LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), FI); 102fecf43ebSMatt Arsenault StackUsed = std::max(StackUsed, Size + Offset); 1032a1b5af2SJay Foad return AddrReg.getReg(0); 104fecf43ebSMatt Arsenault } 105fecf43ebSMatt Arsenault 106fecf43ebSMatt Arsenault void assignValueToReg(Register ValVReg, Register PhysReg, 107fecf43ebSMatt Arsenault CCValAssign &VA) override { 108fecf43ebSMatt Arsenault markPhysRegUsed(PhysReg); 109fecf43ebSMatt Arsenault 110fecf43ebSMatt Arsenault if (VA.getLocVT().getSizeInBits() < 32) { 111fecf43ebSMatt Arsenault // 16-bit types are reported as legal for 32-bit registers. We need to do 112fecf43ebSMatt Arsenault // a 32-bit copy, and truncate to avoid the verifier complaining about it. 113fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); 114fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 115fecf43ebSMatt Arsenault return; 116fecf43ebSMatt Arsenault } 117fecf43ebSMatt Arsenault 118fecf43ebSMatt Arsenault switch (VA.getLocInfo()) { 119fecf43ebSMatt Arsenault case CCValAssign::LocInfo::SExt: 120fecf43ebSMatt Arsenault case CCValAssign::LocInfo::ZExt: 121fecf43ebSMatt Arsenault case CCValAssign::LocInfo::AExt: { 122fecf43ebSMatt Arsenault auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); 123fecf43ebSMatt Arsenault MIRBuilder.buildTrunc(ValVReg, Copy); 124fecf43ebSMatt Arsenault break; 125fecf43ebSMatt Arsenault } 126fecf43ebSMatt Arsenault default: 127fecf43ebSMatt Arsenault MIRBuilder.buildCopy(ValVReg, PhysReg); 128fecf43ebSMatt Arsenault break; 129fecf43ebSMatt Arsenault } 130fecf43ebSMatt Arsenault } 131fecf43ebSMatt Arsenault 132fecf43ebSMatt Arsenault void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 133fecf43ebSMatt Arsenault MachinePointerInfo &MPO, CCValAssign &VA) override { 134fb0c35faSMatt Arsenault MachineFunction &MF = MIRBuilder.getMF(); 135fb0c35faSMatt Arsenault 136fecf43ebSMatt Arsenault // FIXME: Get alignment 137fb0c35faSMatt Arsenault auto MMO = MF.getMachineMemOperand( 138fb0c35faSMatt Arsenault MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1390de874adSGuillaume Chatelet inferAlignFromPtrInfo(MF, MPO)); 140fecf43ebSMatt Arsenault MIRBuilder.buildLoad(ValVReg, Addr, *MMO); 141fecf43ebSMatt Arsenault } 142fecf43ebSMatt Arsenault 143fecf43ebSMatt Arsenault /// How the physical register gets marked varies between formal 144fecf43ebSMatt Arsenault /// parameters (it's a basic-block live-in), and a call instruction 145fecf43ebSMatt Arsenault /// (it's an implicit-def of the BL). 146fecf43ebSMatt Arsenault virtual void markPhysRegUsed(unsigned PhysReg) = 0; 147fecf43ebSMatt Arsenault 148fecf43ebSMatt Arsenault // FIXME: What is the point of this being a callback? 149bc1172dfSAmara Emerson bool isIncomingArgumentHandler() const override { return true; } 150fecf43ebSMatt Arsenault }; 151fecf43ebSMatt Arsenault 152fecf43ebSMatt Arsenault struct FormalArgHandler : public IncomingArgHandler { 15306c8cb03SAustin Kerbow FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, 154fecf43ebSMatt Arsenault CCAssignFn *AssignFn) 15506c8cb03SAustin Kerbow : IncomingArgHandler(B, MRI, AssignFn) {} 156fecf43ebSMatt Arsenault 157fecf43ebSMatt Arsenault void markPhysRegUsed(unsigned PhysReg) override { 158fecf43ebSMatt Arsenault MIRBuilder.getMBB().addLiveIn(PhysReg); 159fecf43ebSMatt Arsenault } 160fecf43ebSMatt Arsenault }; 161fecf43ebSMatt Arsenault 162206b9927STom Stellard } 163206b9927STom Stellard 164000c5af3STom Stellard AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) 1650da6350dSMatt Arsenault : CallLowering(&TLI) { 166000c5af3STom Stellard } 167000c5af3STom Stellard 168eb416277SMatt Arsenault // FIXME: Compatability shim 169eb416277SMatt Arsenault static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) { 170eb416277SMatt Arsenault switch (MIOpc) { 171eb416277SMatt Arsenault case TargetOpcode::G_SEXT: 172eb416277SMatt Arsenault return ISD::SIGN_EXTEND; 173eb416277SMatt Arsenault case TargetOpcode::G_ZEXT: 174eb416277SMatt Arsenault return ISD::ZERO_EXTEND; 175eb416277SMatt Arsenault case TargetOpcode::G_ANYEXT: 176eb416277SMatt Arsenault return ISD::ANY_EXTEND; 177eb416277SMatt Arsenault default: 178eb416277SMatt Arsenault llvm_unreachable("not an extend opcode"); 179eb416277SMatt Arsenault } 180eb416277SMatt Arsenault } 181eb416277SMatt Arsenault 182fecf43ebSMatt Arsenault void AMDGPUCallLowering::splitToValueTypes( 183eb416277SMatt Arsenault MachineIRBuilder &B, 184eb416277SMatt Arsenault const ArgInfo &OrigArg, unsigned OrigArgIdx, 185eb416277SMatt Arsenault SmallVectorImpl<ArgInfo> &SplitArgs, 186eb416277SMatt Arsenault const DataLayout &DL, CallingConv::ID CallConv, 187fecf43ebSMatt Arsenault SplitArgTy PerformArgSplit) const { 188fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 189fecf43ebSMatt Arsenault LLVMContext &Ctx = OrigArg.Ty->getContext(); 190fecf43ebSMatt Arsenault 191fecf43ebSMatt Arsenault if (OrigArg.Ty->isVoidTy()) 192fecf43ebSMatt Arsenault return; 193fecf43ebSMatt Arsenault 194fecf43ebSMatt Arsenault SmallVector<EVT, 4> SplitVTs; 195fecf43ebSMatt Arsenault ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); 196fecf43ebSMatt Arsenault 197b60a2ae4SMatt Arsenault assert(OrigArg.Regs.size() == SplitVTs.size()); 198b60a2ae4SMatt Arsenault 199b60a2ae4SMatt Arsenault int SplitIdx = 0; 200b60a2ae4SMatt Arsenault for (EVT VT : SplitVTs) { 201eb416277SMatt Arsenault Register Reg = OrigArg.Regs[SplitIdx]; 202b60a2ae4SMatt Arsenault Type *Ty = VT.getTypeForEVT(Ctx); 203eb416277SMatt Arsenault LLT LLTy = getLLTForType(*Ty, DL); 204b60a2ae4SMatt Arsenault 205eb416277SMatt Arsenault if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) { 206eb416277SMatt Arsenault unsigned ExtendOp = TargetOpcode::G_ANYEXT; 207eb416277SMatt Arsenault if (OrigArg.Flags[0].isSExt()) { 208eb416277SMatt Arsenault assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); 209eb416277SMatt Arsenault ExtendOp = TargetOpcode::G_SEXT; 210eb416277SMatt Arsenault } else if (OrigArg.Flags[0].isZExt()) { 211eb416277SMatt Arsenault assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); 212eb416277SMatt Arsenault ExtendOp = TargetOpcode::G_ZEXT; 213eb416277SMatt Arsenault } 214b60a2ae4SMatt Arsenault 215eb416277SMatt Arsenault EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT, 216eb416277SMatt Arsenault extOpcodeToISDExtOpcode(ExtendOp)); 217eb416277SMatt Arsenault if (ExtVT != VT) { 218eb416277SMatt Arsenault VT = ExtVT; 219eb416277SMatt Arsenault Ty = ExtVT.getTypeForEVT(Ctx); 220eb416277SMatt Arsenault LLTy = getLLTForType(*Ty, DL); 221eb416277SMatt Arsenault Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0); 222eb416277SMatt Arsenault } 223eb416277SMatt Arsenault } 224eb416277SMatt Arsenault 225eb416277SMatt Arsenault unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); 226eb416277SMatt Arsenault MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); 227fecf43ebSMatt Arsenault 228fecf43ebSMatt Arsenault if (NumParts == 1) { 229bb009498SMatt Arsenault // Fixup EVTs to an MVT. 230bb009498SMatt Arsenault // 231bb009498SMatt Arsenault // FIXME: This is pretty hacky. Why do we have to split the type 232bb009498SMatt Arsenault // legalization logic between here and handleAssignments? 233bb009498SMatt Arsenault if (OrigArgIdx != AttributeList::ReturnIndex && VT != RegVT) { 234bb009498SMatt Arsenault assert(VT.getSizeInBits() < 32 && 235bb009498SMatt Arsenault "unexpected illegal type"); 236bb009498SMatt Arsenault Ty = Type::getInt32Ty(Ctx); 237bb009498SMatt Arsenault Register OrigReg = Reg; 238bb009498SMatt Arsenault Reg = B.getMRI()->createGenericVirtualRegister(LLT::scalar(32)); 239bb009498SMatt Arsenault B.buildTrunc(OrigReg, Reg); 240bb009498SMatt Arsenault } 241bb009498SMatt Arsenault 242fecf43ebSMatt Arsenault // No splitting to do, but we want to replace the original type (e.g. [1 x 243fecf43ebSMatt Arsenault // double] -> double). 244eb416277SMatt Arsenault SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed); 245b60a2ae4SMatt Arsenault 246b60a2ae4SMatt Arsenault ++SplitIdx; 247b60a2ae4SMatt Arsenault continue; 248fecf43ebSMatt Arsenault } 249fecf43ebSMatt Arsenault 250fecf43ebSMatt Arsenault SmallVector<Register, 8> SplitRegs; 251eb416277SMatt Arsenault Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx); 252fecf43ebSMatt Arsenault LLT PartLLT = getLLTForType(*PartTy, DL); 253eb416277SMatt Arsenault MachineRegisterInfo &MRI = *B.getMRI(); 254fecf43ebSMatt Arsenault 255fecf43ebSMatt Arsenault // FIXME: Should we be reporting all of the part registers for a single 256fecf43ebSMatt Arsenault // argument, and let handleAssignments take care of the repacking? 257fecf43ebSMatt Arsenault for (unsigned i = 0; i < NumParts; ++i) { 258fecf43ebSMatt Arsenault Register PartReg = MRI.createGenericVirtualRegister(PartLLT); 259fecf43ebSMatt Arsenault SplitRegs.push_back(PartReg); 260fecf43ebSMatt Arsenault SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); 261fecf43ebSMatt Arsenault } 262fecf43ebSMatt Arsenault 263eb416277SMatt Arsenault PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx); 264b60a2ae4SMatt Arsenault 265b60a2ae4SMatt Arsenault ++SplitIdx; 266b60a2ae4SMatt Arsenault } 267fecf43ebSMatt Arsenault } 268fecf43ebSMatt Arsenault 269a9ea8a9aSMatt Arsenault // Get the appropriate type to make \p OrigTy \p Factor times bigger. 270a9ea8a9aSMatt Arsenault static LLT getMultipleType(LLT OrigTy, int Factor) { 271a9ea8a9aSMatt Arsenault if (OrigTy.isVector()) { 272a9ea8a9aSMatt Arsenault return LLT::vector(OrigTy.getNumElements() * Factor, 273a9ea8a9aSMatt Arsenault OrigTy.getElementType()); 274a9ea8a9aSMatt Arsenault } 275a9ea8a9aSMatt Arsenault 276a9ea8a9aSMatt Arsenault return LLT::scalar(OrigTy.getSizeInBits() * Factor); 277a9ea8a9aSMatt Arsenault } 278a9ea8a9aSMatt Arsenault 279a9ea8a9aSMatt Arsenault // TODO: Move to generic code 28006c8cb03SAustin Kerbow static void unpackRegsToOrigType(MachineIRBuilder &B, 281a9ea8a9aSMatt Arsenault ArrayRef<Register> DstRegs, 282a9ea8a9aSMatt Arsenault Register SrcReg, 283eb416277SMatt Arsenault const CallLowering::ArgInfo &Info, 284a9ea8a9aSMatt Arsenault LLT SrcTy, 285a9ea8a9aSMatt Arsenault LLT PartTy) { 286a9ea8a9aSMatt Arsenault assert(DstRegs.size() > 1 && "Nothing to unpack"); 287a9ea8a9aSMatt Arsenault 288a9ea8a9aSMatt Arsenault const unsigned SrcSize = SrcTy.getSizeInBits(); 289a9ea8a9aSMatt Arsenault const unsigned PartSize = PartTy.getSizeInBits(); 290a9ea8a9aSMatt Arsenault 291a9ea8a9aSMatt Arsenault if (SrcTy.isVector() && !PartTy.isVector() && 292a9ea8a9aSMatt Arsenault PartSize > SrcTy.getElementType().getSizeInBits()) { 293a9ea8a9aSMatt Arsenault // Vector was scalarized, and the elements extended. 29406c8cb03SAustin Kerbow auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), 295a9ea8a9aSMatt Arsenault SrcReg); 296a9ea8a9aSMatt Arsenault for (int i = 0, e = DstRegs.size(); i != e; ++i) 29706c8cb03SAustin Kerbow B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); 298a9ea8a9aSMatt Arsenault return; 299a9ea8a9aSMatt Arsenault } 300a9ea8a9aSMatt Arsenault 301a9ea8a9aSMatt Arsenault if (SrcSize % PartSize == 0) { 30206c8cb03SAustin Kerbow B.buildUnmerge(DstRegs, SrcReg); 303a9ea8a9aSMatt Arsenault return; 304a9ea8a9aSMatt Arsenault } 305a9ea8a9aSMatt Arsenault 306a9ea8a9aSMatt Arsenault const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; 307a9ea8a9aSMatt Arsenault 308a9ea8a9aSMatt Arsenault LLT BigTy = getMultipleType(PartTy, NumRoundedParts); 30906c8cb03SAustin Kerbow auto ImpDef = B.buildUndef(BigTy); 310a9ea8a9aSMatt Arsenault 3112a1b5af2SJay Foad auto Big = B.buildInsert(BigTy, ImpDef.getReg(0), SrcReg, 0).getReg(0); 312a9ea8a9aSMatt Arsenault 313a9ea8a9aSMatt Arsenault int64_t Offset = 0; 314a9ea8a9aSMatt Arsenault for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) 3152a1b5af2SJay Foad B.buildExtract(DstRegs[i], Big, Offset); 316a9ea8a9aSMatt Arsenault } 317a9ea8a9aSMatt Arsenault 318a9ea8a9aSMatt Arsenault /// Lower the return value for the already existing \p Ret. This assumes that 31906c8cb03SAustin Kerbow /// \p B's insertion point is correct. 32006c8cb03SAustin Kerbow bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B, 321a9ea8a9aSMatt Arsenault const Value *Val, ArrayRef<Register> VRegs, 322a9ea8a9aSMatt Arsenault MachineInstrBuilder &Ret) const { 323a9ea8a9aSMatt Arsenault if (!Val) 324a9ea8a9aSMatt Arsenault return true; 325a9ea8a9aSMatt Arsenault 32606c8cb03SAustin Kerbow auto &MF = B.getMF(); 327a9ea8a9aSMatt Arsenault const auto &F = MF.getFunction(); 328a9ea8a9aSMatt Arsenault const DataLayout &DL = MF.getDataLayout(); 329eb416277SMatt Arsenault MachineRegisterInfo *MRI = B.getMRI(); 330a9ea8a9aSMatt Arsenault 331a9ea8a9aSMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 332a9ea8a9aSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 333a9ea8a9aSMatt Arsenault 334a9ea8a9aSMatt Arsenault ArgInfo OrigRetInfo(VRegs, Val->getType()); 335a9ea8a9aSMatt Arsenault setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); 336a9ea8a9aSMatt Arsenault SmallVector<ArgInfo, 4> SplitRetInfos; 337a9ea8a9aSMatt Arsenault 338a9ea8a9aSMatt Arsenault splitToValueTypes( 339eb416277SMatt Arsenault B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC, 340eb416277SMatt Arsenault [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT, 341eb416277SMatt Arsenault int VTSplitIdx) { 342eb416277SMatt Arsenault unpackRegsToOrigType(B, Regs, SrcReg, 343eb416277SMatt Arsenault SplitRetInfos[VTSplitIdx], 344eb416277SMatt Arsenault LLTy, PartLLT); 345a9ea8a9aSMatt Arsenault }); 346a9ea8a9aSMatt Arsenault 347a9ea8a9aSMatt Arsenault CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); 348eb416277SMatt Arsenault OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn); 34906c8cb03SAustin Kerbow return handleAssignments(B, SplitRetInfos, RetHandler); 350a9ea8a9aSMatt Arsenault } 351a9ea8a9aSMatt Arsenault 35206c8cb03SAustin Kerbow bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, 35349168f67SAlexander Ivchenko const Value *Val, 354e3a676e9SMatt Arsenault ArrayRef<Register> VRegs) const { 355206b9927STom Stellard 35606c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 357206b9927STom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 358206b9927STom Stellard SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 359206b9927STom Stellard MFI->setIfReturnsVoid(!Val); 360206b9927STom Stellard 361a9ea8a9aSMatt Arsenault assert(!Val == VRegs.empty() && "Return value without a vreg"); 362a9ea8a9aSMatt Arsenault 36306c8cb03SAustin Kerbow CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); 364a9ea8a9aSMatt Arsenault const bool IsShader = AMDGPU::isShader(CC); 365a9ea8a9aSMatt Arsenault const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || 366a9ea8a9aSMatt Arsenault AMDGPU::isKernel(CC); 367a9ea8a9aSMatt Arsenault if (IsWaveEnd) { 36806c8cb03SAustin Kerbow B.buildInstr(AMDGPU::S_ENDPGM) 369a9ea8a9aSMatt Arsenault .addImm(0); 370206b9927STom Stellard return true; 371206b9927STom Stellard } 372206b9927STom Stellard 373eb416277SMatt Arsenault auto const &ST = MF.getSubtarget<GCNSubtarget>(); 374206b9927STom Stellard 375711556e6SMichael Liao unsigned ReturnOpc = 376711556e6SMichael Liao IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; 377257882ffSTom Stellard 37806c8cb03SAustin Kerbow auto Ret = B.buildInstrNoInsert(ReturnOpc); 379a9ea8a9aSMatt Arsenault Register ReturnAddrVReg; 380a9ea8a9aSMatt Arsenault if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { 381a9ea8a9aSMatt Arsenault ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); 382a9ea8a9aSMatt Arsenault Ret.addUse(ReturnAddrVReg); 383206b9927STom Stellard } 384206b9927STom Stellard 38506c8cb03SAustin Kerbow if (!lowerReturnVal(B, Val, VRegs, Ret)) 386a9ea8a9aSMatt Arsenault return false; 387a9ea8a9aSMatt Arsenault 388a9ea8a9aSMatt Arsenault if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { 389a9ea8a9aSMatt Arsenault const SIRegisterInfo *TRI = ST.getRegisterInfo(); 390a9ea8a9aSMatt Arsenault Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), 391a9ea8a9aSMatt Arsenault &AMDGPU::SGPR_64RegClass); 39206c8cb03SAustin Kerbow B.buildCopy(ReturnAddrVReg, LiveInReturn); 393a9ea8a9aSMatt Arsenault } 394a9ea8a9aSMatt Arsenault 395a9ea8a9aSMatt Arsenault // TODO: Handle CalleeSavedRegsViaCopy. 396a9ea8a9aSMatt Arsenault 39706c8cb03SAustin Kerbow B.insertInstr(Ret); 398000c5af3STom Stellard return true; 399000c5af3STom Stellard } 400000c5af3STom Stellard 40106c8cb03SAustin Kerbow Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B, 402ca16621bSTom Stellard Type *ParamTy, 40329f30379SMatt Arsenault uint64_t Offset) const { 404ca16621bSTom Stellard 40506c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 4068623e8d8SMatt Arsenault const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 407ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 408f1caa283SMatthias Braun const Function &F = MF.getFunction(); 409ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 4100da6350dSMatt Arsenault PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 41152b4ce72SDaniel Sanders LLT PtrType = getLLTForType(*PtrTy, DL); 412faeaedf8SMatt Arsenault Register KernArgSegmentPtr = 4138623e8d8SMatt Arsenault MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 414faeaedf8SMatt Arsenault Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); 415ca16621bSTom Stellard 4162a1b5af2SJay Foad auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset); 417ca16621bSTom Stellard 4182a1b5af2SJay Foad return B.buildPtrAdd(PtrType, KernArgSegmentVReg, OffsetReg).getReg(0); 419ca16621bSTom Stellard } 420ca16621bSTom Stellard 4210de874adSGuillaume Chatelet void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy, 4220de874adSGuillaume Chatelet uint64_t Offset, Align Alignment, 423e3a676e9SMatt Arsenault Register DstReg) const { 42406c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 425f1caa283SMatthias Braun const Function &F = MF.getFunction(); 426ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 427c7c05b0cSJay Foad MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 428ca16621bSTom Stellard unsigned TypeSize = DL.getTypeStoreSize(ParamTy); 42906c8cb03SAustin Kerbow Register PtrReg = lowerParameterPtr(B, ParamTy, Offset); 430ca16621bSTom Stellard 4310de874adSGuillaume Chatelet MachineMemOperand *MMO = MF.getMachineMemOperand( 4320de874adSGuillaume Chatelet PtrInfo, 4330de874adSGuillaume Chatelet MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 434ca16621bSTom Stellard MachineMemOperand::MOInvariant, 4350de874adSGuillaume Chatelet TypeSize, Alignment); 436ca16621bSTom Stellard 43706c8cb03SAustin Kerbow B.buildLoad(DstReg, PtrReg, *MMO); 438ca16621bSTom Stellard } 439ca16621bSTom Stellard 440bae3636fSMatt Arsenault // Allocate special inputs passed in user SGPRs. 441bae3636fSMatt Arsenault static void allocateHSAUserSGPRs(CCState &CCInfo, 44206c8cb03SAustin Kerbow MachineIRBuilder &B, 443bae3636fSMatt Arsenault MachineFunction &MF, 444bae3636fSMatt Arsenault const SIRegisterInfo &TRI, 445bae3636fSMatt Arsenault SIMachineFunctionInfo &Info) { 446bae3636fSMatt Arsenault // FIXME: How should these inputs interact with inreg / custom SGPR inputs? 447bae3636fSMatt Arsenault if (Info.hasPrivateSegmentBuffer()) { 4484dad4914SMatt Arsenault Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); 449bae3636fSMatt Arsenault MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); 450bae3636fSMatt Arsenault CCInfo.AllocateReg(PrivateSegmentBufferReg); 451bae3636fSMatt Arsenault } 452bae3636fSMatt Arsenault 453bae3636fSMatt Arsenault if (Info.hasDispatchPtr()) { 4544dad4914SMatt Arsenault Register DispatchPtrReg = Info.addDispatchPtr(TRI); 455bae3636fSMatt Arsenault MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); 456bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchPtrReg); 457bae3636fSMatt Arsenault } 458bae3636fSMatt Arsenault 459bae3636fSMatt Arsenault if (Info.hasQueuePtr()) { 4604dad4914SMatt Arsenault Register QueuePtrReg = Info.addQueuePtr(TRI); 461bae3636fSMatt Arsenault MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); 462bae3636fSMatt Arsenault CCInfo.AllocateReg(QueuePtrReg); 463bae3636fSMatt Arsenault } 464bae3636fSMatt Arsenault 465bae3636fSMatt Arsenault if (Info.hasKernargSegmentPtr()) { 466bae3636fSMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 467bae3636fSMatt Arsenault Register InputPtrReg = Info.addKernargSegmentPtr(TRI); 468bae3636fSMatt Arsenault const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); 469bae3636fSMatt Arsenault Register VReg = MRI.createGenericVirtualRegister(P4); 470bae3636fSMatt Arsenault MRI.addLiveIn(InputPtrReg, VReg); 47106c8cb03SAustin Kerbow B.getMBB().addLiveIn(InputPtrReg); 47206c8cb03SAustin Kerbow B.buildCopy(VReg, InputPtrReg); 473bae3636fSMatt Arsenault CCInfo.AllocateReg(InputPtrReg); 474bae3636fSMatt Arsenault } 475bae3636fSMatt Arsenault 476bae3636fSMatt Arsenault if (Info.hasDispatchID()) { 4774dad4914SMatt Arsenault Register DispatchIDReg = Info.addDispatchID(TRI); 478bae3636fSMatt Arsenault MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); 479bae3636fSMatt Arsenault CCInfo.AllocateReg(DispatchIDReg); 480bae3636fSMatt Arsenault } 481bae3636fSMatt Arsenault 482bae3636fSMatt Arsenault if (Info.hasFlatScratchInit()) { 4834dad4914SMatt Arsenault Register FlatScratchInitReg = Info.addFlatScratchInit(TRI); 484bae3636fSMatt Arsenault MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); 485bae3636fSMatt Arsenault CCInfo.AllocateReg(FlatScratchInitReg); 486bae3636fSMatt Arsenault } 487bae3636fSMatt Arsenault 488bae3636fSMatt Arsenault // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read 489bae3636fSMatt Arsenault // these from the dispatch pointer. 490bae3636fSMatt Arsenault } 491bae3636fSMatt Arsenault 492b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArgumentsKernel( 49306c8cb03SAustin Kerbow MachineIRBuilder &B, const Function &F, 494c3dbe239SDiana Picus ArrayRef<ArrayRef<Register>> VRegs) const { 49506c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 4965bfbae5cSTom Stellard const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); 497ca16621bSTom Stellard MachineRegisterInfo &MRI = MF.getRegInfo(); 498ca16621bSTom Stellard SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 499fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); 500fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 501fecf43ebSMatt Arsenault 502ca16621bSTom Stellard const DataLayout &DL = F.getParent()->getDataLayout(); 503ca16621bSTom Stellard 504ca16621bSTom Stellard SmallVector<CCValAssign, 16> ArgLocs; 505ca16621bSTom Stellard CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); 506ca16621bSTom Stellard 50706c8cb03SAustin Kerbow allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info); 508bae3636fSMatt Arsenault 50929f30379SMatt Arsenault unsigned i = 0; 5100de874adSGuillaume Chatelet const Align KernArgBaseAlign(16); 51129f30379SMatt Arsenault const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); 51229f30379SMatt Arsenault uint64_t ExplicitArgOffset = 0; 51329f30379SMatt Arsenault 51429f30379SMatt Arsenault // TODO: Align down to dword alignment and extract bits for extending loads. 51529f30379SMatt Arsenault for (auto &Arg : F.args()) { 51629f30379SMatt Arsenault Type *ArgTy = Arg.getType(); 51729f30379SMatt Arsenault unsigned AllocSize = DL.getTypeAllocSize(ArgTy); 51829f30379SMatt Arsenault if (AllocSize == 0) 51929f30379SMatt Arsenault continue; 52029f30379SMatt Arsenault 52129f30379SMatt Arsenault unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); 52229f30379SMatt Arsenault 52329f30379SMatt Arsenault uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; 52429f30379SMatt Arsenault ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; 52529f30379SMatt Arsenault 526c3dbe239SDiana Picus ArrayRef<Register> OrigArgRegs = VRegs[i]; 527c3dbe239SDiana Picus Register ArgReg = 528c3dbe239SDiana Picus OrigArgRegs.size() == 1 529c3dbe239SDiana Picus ? OrigArgRegs[0] 530c3dbe239SDiana Picus : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); 531*431daedeSMatt Arsenault 5320de874adSGuillaume Chatelet Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset); 5330de874adSGuillaume Chatelet lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg); 534c3dbe239SDiana Picus if (OrigArgRegs.size() > 1) 53506c8cb03SAustin Kerbow unpackRegs(OrigArgRegs, ArgReg, ArgTy, B); 53629f30379SMatt Arsenault ++i; 53729f30379SMatt Arsenault } 53829f30379SMatt Arsenault 539fecf43ebSMatt Arsenault TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); 540fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); 54129f30379SMatt Arsenault return true; 54229f30379SMatt Arsenault } 54329f30379SMatt Arsenault 5449e1d2afcSMatt Arsenault /// Pack values \p SrcRegs to cover the vector type result \p DstRegs. 5459e1d2afcSMatt Arsenault static MachineInstrBuilder mergeVectorRegsToResultRegs( 5469e1d2afcSMatt Arsenault MachineIRBuilder &B, ArrayRef<Register> DstRegs, ArrayRef<Register> SrcRegs) { 5479e1d2afcSMatt Arsenault MachineRegisterInfo &MRI = *B.getMRI(); 5489e1d2afcSMatt Arsenault LLT LLTy = MRI.getType(DstRegs[0]); 5499e1d2afcSMatt Arsenault LLT PartLLT = MRI.getType(SrcRegs[0]); 5509e1d2afcSMatt Arsenault 5519e1d2afcSMatt Arsenault // Deal with v3s16 split into v2s16 5529e1d2afcSMatt Arsenault LLT LCMTy = getLCMType(LLTy, PartLLT); 5539e1d2afcSMatt Arsenault if (LCMTy == LLTy) { 5549e1d2afcSMatt Arsenault // Common case where no padding is needed. 5559e1d2afcSMatt Arsenault assert(DstRegs.size() == 1); 5569e1d2afcSMatt Arsenault return B.buildConcatVectors(DstRegs[0], SrcRegs); 5579e1d2afcSMatt Arsenault } 5589e1d2afcSMatt Arsenault 5599e1d2afcSMatt Arsenault const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); 5609e1d2afcSMatt Arsenault Register Undef = B.buildUndef(PartLLT).getReg(0); 5619e1d2afcSMatt Arsenault 5629e1d2afcSMatt Arsenault // Build vector of undefs. 5639e1d2afcSMatt Arsenault SmallVector<Register, 8> WidenedSrcs(NumWide, Undef); 5649e1d2afcSMatt Arsenault 5659e1d2afcSMatt Arsenault // Replace the first sources with the real registers. 5669e1d2afcSMatt Arsenault std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); 5679e1d2afcSMatt Arsenault 5689e1d2afcSMatt Arsenault auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs); 5699e1d2afcSMatt Arsenault int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); 5709e1d2afcSMatt Arsenault 5719e1d2afcSMatt Arsenault SmallVector<Register, 8> PadDstRegs(NumDst); 5729e1d2afcSMatt Arsenault std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin()); 5739e1d2afcSMatt Arsenault 5749e1d2afcSMatt Arsenault // Create the excess dead defs for the unmerge. 5759e1d2afcSMatt Arsenault for (int I = DstRegs.size(); I != NumDst; ++I) 5769e1d2afcSMatt Arsenault PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); 5779e1d2afcSMatt Arsenault 5789e1d2afcSMatt Arsenault return B.buildUnmerge(PadDstRegs, Widened); 5799e1d2afcSMatt Arsenault } 5809e1d2afcSMatt Arsenault 581a9ea8a9aSMatt Arsenault // TODO: Move this to generic code 58206c8cb03SAustin Kerbow static void packSplitRegsToOrigType(MachineIRBuilder &B, 583fecf43ebSMatt Arsenault ArrayRef<Register> OrigRegs, 584fecf43ebSMatt Arsenault ArrayRef<Register> Regs, 585fecf43ebSMatt Arsenault LLT LLTy, 586fecf43ebSMatt Arsenault LLT PartLLT) { 587c460dc6eSMatt Arsenault MachineRegisterInfo &MRI = *B.getMRI(); 588c460dc6eSMatt Arsenault 589fecf43ebSMatt Arsenault if (!LLTy.isVector() && !PartLLT.isVector()) { 590c460dc6eSMatt Arsenault assert(OrigRegs.size() == 1); 591c460dc6eSMatt Arsenault LLT OrigTy = MRI.getType(OrigRegs[0]); 592c460dc6eSMatt Arsenault 593c460dc6eSMatt Arsenault unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size(); 594c460dc6eSMatt Arsenault if (SrcSize == OrigTy.getSizeInBits()) 59506c8cb03SAustin Kerbow B.buildMerge(OrigRegs[0], Regs); 596c460dc6eSMatt Arsenault else { 597c460dc6eSMatt Arsenault auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs); 598c460dc6eSMatt Arsenault B.buildTrunc(OrigRegs[0], Widened); 599c460dc6eSMatt Arsenault } 600c460dc6eSMatt Arsenault 601fecf43ebSMatt Arsenault return; 602fecf43ebSMatt Arsenault } 603fecf43ebSMatt Arsenault 604fecf43ebSMatt Arsenault if (LLTy.isVector() && PartLLT.isVector()) { 6059e1d2afcSMatt Arsenault assert(OrigRegs.size() == 1); 606fecf43ebSMatt Arsenault assert(LLTy.getElementType() == PartLLT.getElementType()); 6079e1d2afcSMatt Arsenault mergeVectorRegsToResultRegs(B, OrigRegs, Regs); 608fecf43ebSMatt Arsenault return; 609fecf43ebSMatt Arsenault } 610fecf43ebSMatt Arsenault 611fecf43ebSMatt Arsenault assert(LLTy.isVector() && !PartLLT.isVector()); 612fecf43ebSMatt Arsenault 613fecf43ebSMatt Arsenault LLT DstEltTy = LLTy.getElementType(); 614767aa507SMatt Arsenault 615767aa507SMatt Arsenault // Pointer information was discarded. We'll need to coerce some register types 616767aa507SMatt Arsenault // to avoid violating type constraints. 617767aa507SMatt Arsenault LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType(); 618767aa507SMatt Arsenault 619767aa507SMatt Arsenault assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); 620767aa507SMatt Arsenault 621fecf43ebSMatt Arsenault if (DstEltTy == PartLLT) { 622fecf43ebSMatt Arsenault // Vector was trivially scalarized. 623767aa507SMatt Arsenault 624767aa507SMatt Arsenault if (RealDstEltTy.isPointer()) { 625767aa507SMatt Arsenault for (Register Reg : Regs) 626767aa507SMatt Arsenault MRI.setType(Reg, RealDstEltTy); 627767aa507SMatt Arsenault } 628767aa507SMatt Arsenault 62906c8cb03SAustin Kerbow B.buildBuildVector(OrigRegs[0], Regs); 630fecf43ebSMatt Arsenault } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { 631fecf43ebSMatt Arsenault // Deal with vector with 64-bit elements decomposed to 32-bit 632fecf43ebSMatt Arsenault // registers. Need to create intermediate 64-bit elements. 633fecf43ebSMatt Arsenault SmallVector<Register, 8> EltMerges; 634fecf43ebSMatt Arsenault int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); 635fecf43ebSMatt Arsenault 636fecf43ebSMatt Arsenault assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); 637fecf43ebSMatt Arsenault 638fecf43ebSMatt Arsenault for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { 639767aa507SMatt Arsenault auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt)); 640767aa507SMatt Arsenault // Fix the type in case this is really a vector of pointers. 641767aa507SMatt Arsenault MRI.setType(Merge.getReg(0), RealDstEltTy); 642fecf43ebSMatt Arsenault EltMerges.push_back(Merge.getReg(0)); 643fecf43ebSMatt Arsenault Regs = Regs.drop_front(PartsPerElt); 644fecf43ebSMatt Arsenault } 645fecf43ebSMatt Arsenault 64606c8cb03SAustin Kerbow B.buildBuildVector(OrigRegs[0], EltMerges); 647fecf43ebSMatt Arsenault } else { 648fecf43ebSMatt Arsenault // Vector was split, and elements promoted to a wider type. 649fecf43ebSMatt Arsenault LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); 65006c8cb03SAustin Kerbow auto BV = B.buildBuildVector(BVType, Regs); 65106c8cb03SAustin Kerbow B.buildTrunc(OrigRegs[0], BV); 652fecf43ebSMatt Arsenault } 653fecf43ebSMatt Arsenault } 654fecf43ebSMatt Arsenault 655b725d273SMatt Arsenault bool AMDGPUCallLowering::lowerFormalArguments( 65606c8cb03SAustin Kerbow MachineIRBuilder &B, const Function &F, 657b725d273SMatt Arsenault ArrayRef<ArrayRef<Register>> VRegs) const { 658fecf43ebSMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 659fecf43ebSMatt Arsenault 660b725d273SMatt Arsenault // The infrastructure for normal calling convention lowering is essentially 661b725d273SMatt Arsenault // useless for kernels. We want to avoid any kind of legalization or argument 662b725d273SMatt Arsenault // splitting. 663fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_KERNEL) 66406c8cb03SAustin Kerbow return lowerFormalArgumentsKernel(B, F, VRegs); 665b725d273SMatt Arsenault 666fecf43ebSMatt Arsenault const bool IsShader = AMDGPU::isShader(CC); 667fecf43ebSMatt Arsenault const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); 668fecf43ebSMatt Arsenault 66906c8cb03SAustin Kerbow MachineFunction &MF = B.getMF(); 67006c8cb03SAustin Kerbow MachineBasicBlock &MBB = B.getMBB(); 671b725d273SMatt Arsenault MachineRegisterInfo &MRI = MF.getRegInfo(); 672b725d273SMatt Arsenault SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 673fecf43ebSMatt Arsenault const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 674fecf43ebSMatt Arsenault const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); 675b725d273SMatt Arsenault const DataLayout &DL = F.getParent()->getDataLayout(); 676b725d273SMatt Arsenault 677b725d273SMatt Arsenault 678b725d273SMatt Arsenault SmallVector<CCValAssign, 16> ArgLocs; 679fecf43ebSMatt Arsenault CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); 680b725d273SMatt Arsenault 681a9ea8a9aSMatt Arsenault if (!IsEntryFunc) { 682a9ea8a9aSMatt Arsenault Register ReturnAddrReg = TRI->getReturnAddressReg(MF); 683a9ea8a9aSMatt Arsenault Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, 684a9ea8a9aSMatt Arsenault &AMDGPU::SGPR_64RegClass); 685a9ea8a9aSMatt Arsenault MBB.addLiveIn(ReturnAddrReg); 68606c8cb03SAustin Kerbow B.buildCopy(LiveInReturn, ReturnAddrReg); 687a9ea8a9aSMatt Arsenault } 688a9ea8a9aSMatt Arsenault 689bae3636fSMatt Arsenault if (Info->hasImplicitBufferPtr()) { 690fecf43ebSMatt Arsenault Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); 691bae3636fSMatt Arsenault MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); 692bae3636fSMatt Arsenault CCInfo.AllocateReg(ImplicitBufferPtrReg); 693bae3636fSMatt Arsenault } 694bae3636fSMatt Arsenault 695fecf43ebSMatt Arsenault 696fecf43ebSMatt Arsenault SmallVector<ArgInfo, 32> SplitArgs; 697fecf43ebSMatt Arsenault unsigned Idx = 0; 698c7709e1cSTom Stellard unsigned PSInputNum = 0; 6999d8337d8STom Stellard 700fecf43ebSMatt Arsenault for (auto &Arg : F.args()) { 701fecf43ebSMatt Arsenault if (DL.getTypeStoreSize(Arg.getType()) == 0) 702c7709e1cSTom Stellard continue; 703c7709e1cSTom Stellard 704fecf43ebSMatt Arsenault const bool InReg = Arg.hasAttribute(Attribute::InReg); 705fecf43ebSMatt Arsenault 706fecf43ebSMatt Arsenault // SGPR arguments to functions not implemented. 707fecf43ebSMatt Arsenault if (!IsShader && InReg) 708fecf43ebSMatt Arsenault return false; 709fecf43ebSMatt Arsenault 710a9ea8a9aSMatt Arsenault if (Arg.hasAttribute(Attribute::SwiftSelf) || 711fecf43ebSMatt Arsenault Arg.hasAttribute(Attribute::SwiftError) || 712b60a2ae4SMatt Arsenault Arg.hasAttribute(Attribute::Nest)) 713fecf43ebSMatt Arsenault return false; 714fecf43ebSMatt Arsenault 715fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { 716fecf43ebSMatt Arsenault const bool ArgUsed = !Arg.use_empty(); 717fecf43ebSMatt Arsenault bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); 718fecf43ebSMatt Arsenault 719fecf43ebSMatt Arsenault if (!SkipArg) { 720c7709e1cSTom Stellard Info->markPSInputAllocated(PSInputNum); 721fecf43ebSMatt Arsenault if (ArgUsed) 722c7709e1cSTom Stellard Info->markPSInputEnabled(PSInputNum); 723fecf43ebSMatt Arsenault } 724c7709e1cSTom Stellard 725c7709e1cSTom Stellard ++PSInputNum; 726c7709e1cSTom Stellard 727fecf43ebSMatt Arsenault if (SkipArg) { 728b60a2ae4SMatt Arsenault for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) 72906c8cb03SAustin Kerbow B.buildUndef(VRegs[Idx][I]); 730b60a2ae4SMatt Arsenault 731fecf43ebSMatt Arsenault ++Idx; 732c7709e1cSTom Stellard continue; 733fecf43ebSMatt Arsenault } 7349d8337d8STom Stellard } 735e0a4da8cSMatt Arsenault 736fecf43ebSMatt Arsenault ArgInfo OrigArg(VRegs[Idx], Arg.getType()); 737eb416277SMatt Arsenault const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex; 738eb416277SMatt Arsenault setArgFlags(OrigArg, OrigArgIdx, DL, F); 739b60a2ae4SMatt Arsenault 740b60a2ae4SMatt Arsenault splitToValueTypes( 741eb416277SMatt Arsenault B, OrigArg, OrigArgIdx, SplitArgs, DL, CC, 742fecf43ebSMatt Arsenault // FIXME: We should probably be passing multiple registers to 743fecf43ebSMatt Arsenault // handleAssignments to do this 744eb416277SMatt Arsenault [&](ArrayRef<Register> Regs, Register DstReg, 745eb416277SMatt Arsenault LLT LLTy, LLT PartLLT, int VTSplitIdx) { 746eb416277SMatt Arsenault assert(DstReg == VRegs[Idx][VTSplitIdx]); 74706c8cb03SAustin Kerbow packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, 748b60a2ae4SMatt Arsenault LLTy, PartLLT); 749fecf43ebSMatt Arsenault }); 750fecf43ebSMatt Arsenault 751fecf43ebSMatt Arsenault ++Idx; 7529d8337d8STom Stellard } 7539d8337d8STom Stellard 754fecf43ebSMatt Arsenault // At least one interpolation mode must be enabled or else the GPU will 755fecf43ebSMatt Arsenault // hang. 756fecf43ebSMatt Arsenault // 757fecf43ebSMatt Arsenault // Check PSInputAddr instead of PSInputEnable. The idea is that if the user 758fecf43ebSMatt Arsenault // set PSInputAddr, the user wants to enable some bits after the compilation 759fecf43ebSMatt Arsenault // based on run-time states. Since we can't know what the final PSInputEna 760fecf43ebSMatt Arsenault // will look like, so we shouldn't do anything here and the user should take 761fecf43ebSMatt Arsenault // responsibility for the correct programming. 762fecf43ebSMatt Arsenault // 763fecf43ebSMatt Arsenault // Otherwise, the following restrictions apply: 764fecf43ebSMatt Arsenault // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. 765fecf43ebSMatt Arsenault // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be 766fecf43ebSMatt Arsenault // enabled too. 767fecf43ebSMatt Arsenault if (CC == CallingConv::AMDGPU_PS) { 768fecf43ebSMatt Arsenault if ((Info->getPSInputAddr() & 0x7F) == 0 || 769fecf43ebSMatt Arsenault ((Info->getPSInputAddr() & 0xF) == 0 && 770fecf43ebSMatt Arsenault Info->isPSInputAllocated(11))) { 771fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR0); 772fecf43ebSMatt Arsenault CCInfo.AllocateReg(AMDGPU::VGPR1); 773fecf43ebSMatt Arsenault Info->markPSInputAllocated(0); 774fecf43ebSMatt Arsenault Info->markPSInputEnabled(0); 775fecf43ebSMatt Arsenault } 776fecf43ebSMatt Arsenault 777fecf43ebSMatt Arsenault if (Subtarget.isAmdPalOS()) { 778fecf43ebSMatt Arsenault // For isAmdPalOS, the user does not enable some bits after compilation 779fecf43ebSMatt Arsenault // based on run-time states; the register values being generated here are 780fecf43ebSMatt Arsenault // the final ones set in hardware. Therefore we need to apply the 781fecf43ebSMatt Arsenault // workaround to PSInputAddr and PSInputEnable together. (The case where 782fecf43ebSMatt Arsenault // a bit is set in PSInputAddr but not PSInputEnable is where the frontend 783fecf43ebSMatt Arsenault // set up an input arg for a particular interpolation mode, but nothing 784fecf43ebSMatt Arsenault // uses that input arg. Really we should have an earlier pass that removes 785fecf43ebSMatt Arsenault // such an arg.) 786fecf43ebSMatt Arsenault unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); 787fecf43ebSMatt Arsenault if ((PsInputBits & 0x7F) == 0 || 788fecf43ebSMatt Arsenault ((PsInputBits & 0xF) == 0 && 789fecf43ebSMatt Arsenault (PsInputBits >> 11 & 1))) 790fecf43ebSMatt Arsenault Info->markPSInputEnabled( 791fecf43ebSMatt Arsenault countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); 792fecf43ebSMatt Arsenault } 793fecf43ebSMatt Arsenault } 794fecf43ebSMatt Arsenault 795fecf43ebSMatt Arsenault const SITargetLowering &TLI = *getTLI<SITargetLowering>(); 796fecf43ebSMatt Arsenault CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); 797fecf43ebSMatt Arsenault 798fecf43ebSMatt Arsenault if (!MBB.empty()) 79906c8cb03SAustin Kerbow B.setInstr(*MBB.begin()); 800fecf43ebSMatt Arsenault 801a162048aSMatt Arsenault if (!IsEntryFunc) { 802a162048aSMatt Arsenault // For the fixed ABI, pass workitem IDs in the last argument register. 803a162048aSMatt Arsenault if (AMDGPUTargetMachine::EnableFixedFunctionABI) 804a162048aSMatt Arsenault TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info); 805a162048aSMatt Arsenault } 806a162048aSMatt Arsenault 80706c8cb03SAustin Kerbow FormalArgHandler Handler(B, MRI, AssignFn); 80806c8cb03SAustin Kerbow if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler)) 80929f30379SMatt Arsenault return false; 810fecf43ebSMatt Arsenault 811a162048aSMatt Arsenault if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) { 812fecf43ebSMatt Arsenault // Special inputs come after user arguments. 813fecf43ebSMatt Arsenault TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); 814fecf43ebSMatt Arsenault } 815fecf43ebSMatt Arsenault 816fecf43ebSMatt Arsenault // Start adding system SGPRs. 817fecf43ebSMatt Arsenault if (IsEntryFunc) { 818fecf43ebSMatt Arsenault TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); 819fecf43ebSMatt Arsenault } else { 820fecf43ebSMatt Arsenault CCInfo.AllocateReg(Info->getScratchRSrcReg()); 821fecf43ebSMatt Arsenault TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); 822fecf43ebSMatt Arsenault } 823fecf43ebSMatt Arsenault 824fecf43ebSMatt Arsenault // Move back to the end of the basic block. 82506c8cb03SAustin Kerbow B.setMBB(MBB); 826fecf43ebSMatt Arsenault 827fecf43ebSMatt Arsenault return true; 828000c5af3STom Stellard } 829