1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file implements the lowering of LLVM calls to machine code calls for 12 /// GlobalISel. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUCallLowering.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUISelLowering.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIISelLowering.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "SIRegisterInfo.h" 23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 24 #include "llvm/CodeGen/CallingConvLower.h" 25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 28 using namespace llvm; 29 30 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) 31 : CallLowering(&TLI) { 32 } 33 34 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, 35 const Value *Val, 36 ArrayRef<unsigned> VRegs) const { 37 // FIXME: Add support for non-void returns. 38 if (Val) 39 return false; 40 41 MIRBuilder.buildInstr(AMDGPU::S_ENDPGM); 42 return true; 43 } 44 45 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, 46 Type *ParamTy, 47 uint64_t Offset) const { 48 49 MachineFunction &MF = MIRBuilder.getMF(); 50 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 51 MachineRegisterInfo &MRI = MF.getRegInfo(); 52 const Function &F = MF.getFunction(); 53 const DataLayout &DL = F.getParent()->getDataLayout(); 54 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 55 LLT PtrType = getLLTForType(*PtrTy, DL); 56 unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); 57 unsigned KernArgSegmentPtr = 58 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 59 unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); 60 61 unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 62 MIRBuilder.buildConstant(OffsetReg, Offset); 63 64 MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); 65 66 return DstReg; 67 } 68 69 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, 70 Type *ParamTy, uint64_t Offset, 71 unsigned Align, 72 unsigned DstReg) const { 73 MachineFunction &MF = MIRBuilder.getMF(); 74 const Function &F = MF.getFunction(); 75 const DataLayout &DL = F.getParent()->getDataLayout(); 76 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 77 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); 78 unsigned TypeSize = DL.getTypeStoreSize(ParamTy); 79 unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); 80 81 MachineMemOperand *MMO = 82 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | 83 MachineMemOperand::MONonTemporal | 84 MachineMemOperand::MOInvariant, 85 TypeSize, Align); 86 87 MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); 88 } 89 90 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, 91 const Function &F, 92 ArrayRef<unsigned> VRegs) const { 93 // AMDGPU_GS and AMDGP_HS are not supported yet. 94 if (F.getCallingConv() == CallingConv::AMDGPU_GS || 95 F.getCallingConv() == CallingConv::AMDGPU_HS) 96 return false; 97 98 MachineFunction &MF = MIRBuilder.getMF(); 99 const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); 100 MachineRegisterInfo &MRI = MF.getRegInfo(); 101 SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 102 const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 103 const DataLayout &DL = F.getParent()->getDataLayout(); 104 105 SmallVector<CCValAssign, 16> ArgLocs; 106 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); 107 108 // FIXME: How should these inputs interact with inreg / custom SGPR inputs? 109 if (Info->hasPrivateSegmentBuffer()) { 110 unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); 111 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass); 112 CCInfo.AllocateReg(PrivateSegmentBufferReg); 113 } 114 115 if (Info->hasDispatchPtr()) { 116 unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI); 117 // FIXME: Need to add reg as live-in 118 CCInfo.AllocateReg(DispatchPtrReg); 119 } 120 121 if (Info->hasQueuePtr()) { 122 unsigned QueuePtrReg = Info->addQueuePtr(*TRI); 123 // FIXME: Need to add reg as live-in 124 CCInfo.AllocateReg(QueuePtrReg); 125 } 126 127 if (Info->hasKernargSegmentPtr()) { 128 unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI); 129 const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); 130 unsigned VReg = MRI.createGenericVirtualRegister(P2); 131 MRI.addLiveIn(InputPtrReg, VReg); 132 MIRBuilder.getMBB().addLiveIn(InputPtrReg); 133 MIRBuilder.buildCopy(VReg, InputPtrReg); 134 CCInfo.AllocateReg(InputPtrReg); 135 } 136 137 if (Info->hasDispatchID()) { 138 unsigned DispatchIDReg = Info->addDispatchID(*TRI); 139 // FIXME: Need to add reg as live-in 140 CCInfo.AllocateReg(DispatchIDReg); 141 } 142 143 if (Info->hasFlatScratchInit()) { 144 unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI); 145 // FIXME: Need to add reg as live-in 146 CCInfo.AllocateReg(FlatScratchInitReg); 147 } 148 149 // The infrastructure for normal calling convention lowering is essentially 150 // useless for kernels. We want to avoid any kind of legalization or argument 151 // splitting. 152 if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { 153 unsigned i = 0; 154 const unsigned KernArgBaseAlign = 16; 155 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); 156 uint64_t ExplicitArgOffset = 0; 157 158 // TODO: Align down to dword alignment and extract bits for extending loads. 159 for (auto &Arg : F.args()) { 160 Type *ArgTy = Arg.getType(); 161 unsigned AllocSize = DL.getTypeAllocSize(ArgTy); 162 if (AllocSize == 0) 163 continue; 164 165 unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); 166 167 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; 168 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; 169 170 unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); 171 ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); 172 lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]); 173 ++i; 174 } 175 176 return true; 177 } 178 179 unsigned NumArgs = F.arg_size(); 180 Function::const_arg_iterator CurOrigArg = F.arg_begin(); 181 const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); 182 unsigned PSInputNum = 0; 183 BitVector Skipped(NumArgs); 184 for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { 185 EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType()); 186 187 // We can only hanlde simple value types at the moment. 188 ISD::ArgFlagsTy Flags; 189 ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()}; 190 setArgFlags(OrigArg, i + 1, DL, F); 191 Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); 192 193 if (F.getCallingConv() == CallingConv::AMDGPU_PS && 194 !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() && 195 PSInputNum <= 15) { 196 if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) { 197 Skipped.set(i); 198 ++PSInputNum; 199 continue; 200 } 201 202 Info->markPSInputAllocated(PSInputNum); 203 if (!CurOrigArg->use_empty()) 204 Info->markPSInputEnabled(PSInputNum); 205 206 ++PSInputNum; 207 } 208 209 CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), 210 /*IsVarArg=*/false); 211 212 if (ValEVT.isVector()) { 213 EVT ElemVT = ValEVT.getVectorElementType(); 214 if (!ValEVT.isSimple()) 215 return false; 216 MVT ValVT = ElemVT.getSimpleVT(); 217 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, 218 OrigArg.Flags, CCInfo); 219 if (!Res) 220 return false; 221 } else { 222 MVT ValVT = ValEVT.getSimpleVT(); 223 if (!ValEVT.isSimple()) 224 return false; 225 bool Res = 226 AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); 227 228 // Fail if we don't know how to handle this type. 229 if (Res) 230 return false; 231 } 232 } 233 234 Function::const_arg_iterator Arg = F.arg_begin(); 235 236 if (F.getCallingConv() == CallingConv::AMDGPU_VS || 237 F.getCallingConv() == CallingConv::AMDGPU_PS) { 238 for (unsigned i = 0, OrigArgIdx = 0; 239 OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { 240 if (Skipped.test(OrigArgIdx)) 241 continue; 242 CCValAssign &VA = ArgLocs[i++]; 243 MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]); 244 MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); 245 MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg()); 246 } 247 return true; 248 } 249 250 return false; 251 } 252