10b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics
100b57cec5SDimitry Andric /// which will impact calling convention lowering.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "AMDGPU.h"
15af732203SDimitry Andric #include "GCNSubtarget.h"
160b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h"
170b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
19af732203SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
20af732203SDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
210b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
220b57cec5SDimitry Andric
230b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric using namespace llvm;
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric namespace {
28*5f7ddb14SDimitry Andric static constexpr StringLiteral ImplicitAttrNames[] = {
29*5f7ddb14SDimitry Andric // X ids unnecessarily propagated to kernels.
30*5f7ddb14SDimitry Andric "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
31*5f7ddb14SDimitry Andric "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
32*5f7ddb14SDimitry Andric "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
33*5f7ddb14SDimitry Andric "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
34*5f7ddb14SDimitry Andric "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
370b57cec5SDimitry Andric private:
380b57cec5SDimitry Andric const TargetMachine *TM = nullptr;
390b57cec5SDimitry Andric SmallVector<CallGraphNode*, 8> NodeList;
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric bool addFeatureAttributes(Function &F);
420b57cec5SDimitry Andric bool processUniformWorkGroupAttribute();
430b57cec5SDimitry Andric bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric public:
460b57cec5SDimitry Andric static char ID;
470b57cec5SDimitry Andric
AMDGPUAnnotateKernelFeatures()480b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric bool doInitialization(CallGraph &CG) override;
510b57cec5SDimitry Andric bool runOnSCC(CallGraphSCC &SCC) override;
520b57cec5SDimitry Andric
getPassName() const530b57cec5SDimitry Andric StringRef getPassName() const override {
540b57cec5SDimitry Andric return "AMDGPU Annotate Kernel Features";
550b57cec5SDimitry Andric }
560b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const570b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
580b57cec5SDimitry Andric AU.setPreservesAll();
590b57cec5SDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU);
600b57cec5SDimitry Andric }
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric static bool visitConstantExpr(const ConstantExpr *CE);
630b57cec5SDimitry Andric static bool visitConstantExprsRecursively(
640b57cec5SDimitry Andric const Constant *EntryC,
655ffd83dbSDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
665ffd83dbSDimitry Andric bool HasApertureRegs);
670b57cec5SDimitry Andric };
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric } // end anonymous namespace
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
740b57cec5SDimitry Andric
750b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
760b57cec5SDimitry Andric "Add AMDGPU function attributes", false, false)
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric
790b57cec5SDimitry Andric // The queue ptr is only needed when casting to flat, not from it.
castRequiresQueuePtr(unsigned SrcAS)800b57cec5SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
810b57cec5SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric
castRequiresQueuePtr(const AddrSpaceCastInst * ASC)840b57cec5SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
850b57cec5SDimitry Andric return castRequiresQueuePtr(ASC->getSrcAddressSpace());
860b57cec5SDimitry Andric }
870b57cec5SDimitry Andric
isDSAddress(const Constant * C)885ffd83dbSDimitry Andric static bool isDSAddress(const Constant *C) {
895ffd83dbSDimitry Andric const GlobalValue *GV = dyn_cast<GlobalValue>(C);
905ffd83dbSDimitry Andric if (!GV)
915ffd83dbSDimitry Andric return false;
925ffd83dbSDimitry Andric unsigned AS = GV->getAddressSpace();
935ffd83dbSDimitry Andric return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
945ffd83dbSDimitry Andric }
955ffd83dbSDimitry Andric
visitConstantExpr(const ConstantExpr * CE)960b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
970b57cec5SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) {
980b57cec5SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
990b57cec5SDimitry Andric return castRequiresQueuePtr(SrcAS);
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andric return false;
1030b57cec5SDimitry Andric }
1040b57cec5SDimitry Andric
visitConstantExprsRecursively(const Constant * EntryC,SmallPtrSet<const Constant *,8> & ConstantExprVisited,bool IsFunc,bool HasApertureRegs)1050b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
1060b57cec5SDimitry Andric const Constant *EntryC,
1075ffd83dbSDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
1085ffd83dbSDimitry Andric bool IsFunc, bool HasApertureRegs) {
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric if (!ConstantExprVisited.insert(EntryC).second)
1110b57cec5SDimitry Andric return false;
1120b57cec5SDimitry Andric
1130b57cec5SDimitry Andric SmallVector<const Constant *, 16> Stack;
1140b57cec5SDimitry Andric Stack.push_back(EntryC);
1150b57cec5SDimitry Andric
1160b57cec5SDimitry Andric while (!Stack.empty()) {
1170b57cec5SDimitry Andric const Constant *C = Stack.pop_back_val();
1180b57cec5SDimitry Andric
1195ffd83dbSDimitry Andric // We need to trap on DS globals in non-entry functions.
1205ffd83dbSDimitry Andric if (IsFunc && isDSAddress(C))
1215ffd83dbSDimitry Andric return true;
1225ffd83dbSDimitry Andric
1230b57cec5SDimitry Andric // Check this constant expression.
1240b57cec5SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
1255ffd83dbSDimitry Andric if (!HasApertureRegs && visitConstantExpr(CE))
1260b57cec5SDimitry Andric return true;
1270b57cec5SDimitry Andric }
1280b57cec5SDimitry Andric
1290b57cec5SDimitry Andric // Visit all sub-expressions.
1300b57cec5SDimitry Andric for (const Use &U : C->operands()) {
1310b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U);
1320b57cec5SDimitry Andric if (!OpC)
1330b57cec5SDimitry Andric continue;
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric if (!ConstantExprVisited.insert(OpC).second)
1360b57cec5SDimitry Andric continue;
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andric Stack.push_back(OpC);
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric }
1410b57cec5SDimitry Andric
1420b57cec5SDimitry Andric return false;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric
1450b57cec5SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
1460b57cec5SDimitry Andric // initialized.
1470b57cec5SDimitry Andric //
1480b57cec5SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
1490b57cec5SDimitry Andric // size is 1 for y/z.
intrinsicToAttrName(Intrinsic::ID ID,bool & NonKernelOnly,bool & IsQueuePtr)1500b57cec5SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID,
1510b57cec5SDimitry Andric bool &NonKernelOnly,
1520b57cec5SDimitry Andric bool &IsQueuePtr) {
1530b57cec5SDimitry Andric switch (ID) {
1540b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_x:
1550b57cec5SDimitry Andric NonKernelOnly = true;
1560b57cec5SDimitry Andric return "amdgpu-work-item-id-x";
1570b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x:
1580b57cec5SDimitry Andric NonKernelOnly = true;
1590b57cec5SDimitry Andric return "amdgpu-work-group-id-x";
1600b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_y:
1610b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_y:
1620b57cec5SDimitry Andric return "amdgpu-work-item-id-y";
1630b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_z:
1640b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_z:
1650b57cec5SDimitry Andric return "amdgpu-work-item-id-z";
1660b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y:
1670b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_y:
1680b57cec5SDimitry Andric return "amdgpu-work-group-id-y";
1690b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z:
1700b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_z:
1710b57cec5SDimitry Andric return "amdgpu-work-group-id-z";
1720b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr:
1730b57cec5SDimitry Andric return "amdgpu-dispatch-ptr";
1740b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_id:
1750b57cec5SDimitry Andric return "amdgpu-dispatch-id";
1760b57cec5SDimitry Andric case Intrinsic::amdgcn_kernarg_segment_ptr:
1770b57cec5SDimitry Andric return "amdgpu-kernarg-segment-ptr";
1780b57cec5SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr:
1790b57cec5SDimitry Andric return "amdgpu-implicitarg-ptr";
1800b57cec5SDimitry Andric case Intrinsic::amdgcn_queue_ptr:
1818bcb0991SDimitry Andric case Intrinsic::amdgcn_is_shared:
1828bcb0991SDimitry Andric case Intrinsic::amdgcn_is_private:
1838bcb0991SDimitry Andric // TODO: Does not require queue ptr on gfx9+
1840b57cec5SDimitry Andric case Intrinsic::trap:
1850b57cec5SDimitry Andric case Intrinsic::debugtrap:
1860b57cec5SDimitry Andric IsQueuePtr = true;
1870b57cec5SDimitry Andric return "amdgpu-queue-ptr";
1880b57cec5SDimitry Andric default:
1890b57cec5SDimitry Andric return "";
1900b57cec5SDimitry Andric }
1910b57cec5SDimitry Andric }
1920b57cec5SDimitry Andric
handleAttr(Function & Parent,const Function & Callee,StringRef Name)1930b57cec5SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee,
1940b57cec5SDimitry Andric StringRef Name) {
1950b57cec5SDimitry Andric if (Callee.hasFnAttribute(Name)) {
1960b57cec5SDimitry Andric Parent.addFnAttr(Name);
1970b57cec5SDimitry Andric return true;
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric return false;
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric
copyFeaturesToFunction(Function & Parent,const Function & Callee,bool & NeedQueuePtr)2020b57cec5SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
2030b57cec5SDimitry Andric bool &NeedQueuePtr) {
2040b57cec5SDimitry Andric if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
2050b57cec5SDimitry Andric NeedQueuePtr = true;
2060b57cec5SDimitry Andric
207*5f7ddb14SDimitry Andric for (StringRef AttrName : ImplicitAttrNames)
2080b57cec5SDimitry Andric handleAttr(Parent, Callee, AttrName);
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric
processUniformWorkGroupAttribute()2110b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
2120b57cec5SDimitry Andric bool Changed = false;
2130b57cec5SDimitry Andric
2140b57cec5SDimitry Andric for (auto *Node : reverse(NodeList)) {
2150b57cec5SDimitry Andric Function *Caller = Node->getFunction();
2160b57cec5SDimitry Andric
2170b57cec5SDimitry Andric for (auto I : *Node) {
2180b57cec5SDimitry Andric Function *Callee = std::get<1>(I)->getFunction();
2190b57cec5SDimitry Andric if (Callee)
2200b57cec5SDimitry Andric Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric }
2230b57cec5SDimitry Andric
2240b57cec5SDimitry Andric return Changed;
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric
propagateUniformWorkGroupAttribute(Function & Caller,Function & Callee)2270b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
2280b57cec5SDimitry Andric Function &Caller, Function &Callee) {
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric // Check for externally defined function
2310b57cec5SDimitry Andric if (!Callee.hasExactDefinition()) {
2320b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false");
2330b57cec5SDimitry Andric if (!Caller.hasFnAttribute("uniform-work-group-size"))
2340b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false");
2350b57cec5SDimitry Andric
2360b57cec5SDimitry Andric return true;
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric // Check if the Caller has the attribute
2390b57cec5SDimitry Andric if (Caller.hasFnAttribute("uniform-work-group-size")) {
2400b57cec5SDimitry Andric // Check if the value of the attribute is true
2410b57cec5SDimitry Andric if (Caller.getFnAttribute("uniform-work-group-size")
2420b57cec5SDimitry Andric .getValueAsString().equals("true")) {
2430b57cec5SDimitry Andric // Propagate the attribute to the Callee, if it does not have it
2440b57cec5SDimitry Andric if (!Callee.hasFnAttribute("uniform-work-group-size")) {
2450b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "true");
2460b57cec5SDimitry Andric return true;
2470b57cec5SDimitry Andric }
2480b57cec5SDimitry Andric } else {
2490b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false");
2500b57cec5SDimitry Andric return true;
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric } else {
2530b57cec5SDimitry Andric // If the attribute is absent, set it as false
2540b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false");
2550b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false");
2560b57cec5SDimitry Andric return true;
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric return false;
2590b57cec5SDimitry Andric }
2600b57cec5SDimitry Andric
addFeatureAttributes(Function & F)2610b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
2620b57cec5SDimitry Andric const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
2630b57cec5SDimitry Andric bool HasApertureRegs = ST.hasApertureRegs();
2640b57cec5SDimitry Andric SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2650b57cec5SDimitry Andric
2665ffd83dbSDimitry Andric bool HaveStackObjects = false;
2670b57cec5SDimitry Andric bool Changed = false;
2680b57cec5SDimitry Andric bool NeedQueuePtr = false;
2690b57cec5SDimitry Andric bool HaveCall = false;
270*5f7ddb14SDimitry Andric bool HasIndirectCall = false;
2710b57cec5SDimitry Andric bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
272*5f7ddb14SDimitry Andric CallingConv::ID CC = F.getCallingConv();
273*5f7ddb14SDimitry Andric bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
274*5f7ddb14SDimitry Andric
275*5f7ddb14SDimitry Andric // If this function hasAddressTaken() = true
276*5f7ddb14SDimitry Andric // then add all attributes corresponding to the implicit args.
277*5f7ddb14SDimitry Andric if (CallingConvSupportsAllImplicits &&
278*5f7ddb14SDimitry Andric F.hasAddressTaken(nullptr, true, true, true)) {
279*5f7ddb14SDimitry Andric for (StringRef AttrName : ImplicitAttrNames) {
280*5f7ddb14SDimitry Andric F.addFnAttr(AttrName);
281*5f7ddb14SDimitry Andric }
282*5f7ddb14SDimitry Andric Changed = true;
283*5f7ddb14SDimitry Andric }
2840b57cec5SDimitry Andric
2850b57cec5SDimitry Andric for (BasicBlock &BB : F) {
2860b57cec5SDimitry Andric for (Instruction &I : BB) {
2875ffd83dbSDimitry Andric if (isa<AllocaInst>(I)) {
2885ffd83dbSDimitry Andric HaveStackObjects = true;
2895ffd83dbSDimitry Andric continue;
2905ffd83dbSDimitry Andric }
2915ffd83dbSDimitry Andric
2925ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) {
2935ffd83dbSDimitry Andric const Function *Callee =
2945ffd83dbSDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
2950b57cec5SDimitry Andric
296*5f7ddb14SDimitry Andric // Note the occurence of indirect call.
2970b57cec5SDimitry Andric if (!Callee) {
298*5f7ddb14SDimitry Andric if (!CB->isInlineAsm()) {
299*5f7ddb14SDimitry Andric HasIndirectCall = true;
3000b57cec5SDimitry Andric HaveCall = true;
301*5f7ddb14SDimitry Andric }
3020b57cec5SDimitry Andric continue;
3030b57cec5SDimitry Andric }
3040b57cec5SDimitry Andric
3050b57cec5SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID();
3060b57cec5SDimitry Andric if (IID == Intrinsic::not_intrinsic) {
3070b57cec5SDimitry Andric HaveCall = true;
3080b57cec5SDimitry Andric copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
3090b57cec5SDimitry Andric Changed = true;
3100b57cec5SDimitry Andric } else {
3110b57cec5SDimitry Andric bool NonKernelOnly = false;
3125ffd83dbSDimitry Andric
3135ffd83dbSDimitry Andric if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
3145ffd83dbSDimitry Andric F.addFnAttr("amdgpu-kernarg-segment-ptr");
3155ffd83dbSDimitry Andric } else {
3165ffd83dbSDimitry Andric StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
3175ffd83dbSDimitry Andric NeedQueuePtr);
3180b57cec5SDimitry Andric if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
3190b57cec5SDimitry Andric F.addFnAttr(AttrName);
3200b57cec5SDimitry Andric Changed = true;
3210b57cec5SDimitry Andric }
3220b57cec5SDimitry Andric }
3230b57cec5SDimitry Andric }
3245ffd83dbSDimitry Andric }
3250b57cec5SDimitry Andric
3265ffd83dbSDimitry Andric if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
3270b57cec5SDimitry Andric continue;
3280b57cec5SDimitry Andric
3290b57cec5SDimitry Andric if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
3305ffd83dbSDimitry Andric if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
3310b57cec5SDimitry Andric NeedQueuePtr = true;
3320b57cec5SDimitry Andric continue;
3330b57cec5SDimitry Andric }
3340b57cec5SDimitry Andric }
3350b57cec5SDimitry Andric
3360b57cec5SDimitry Andric for (const Use &U : I.operands()) {
3370b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U);
3380b57cec5SDimitry Andric if (!OpC)
3390b57cec5SDimitry Andric continue;
3400b57cec5SDimitry Andric
3415ffd83dbSDimitry Andric if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
3425ffd83dbSDimitry Andric HasApertureRegs)) {
3430b57cec5SDimitry Andric NeedQueuePtr = true;
3440b57cec5SDimitry Andric break;
3450b57cec5SDimitry Andric }
3460b57cec5SDimitry Andric }
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric }
3490b57cec5SDimitry Andric
3500b57cec5SDimitry Andric if (NeedQueuePtr) {
3510b57cec5SDimitry Andric F.addFnAttr("amdgpu-queue-ptr");
3520b57cec5SDimitry Andric Changed = true;
3530b57cec5SDimitry Andric }
3540b57cec5SDimitry Andric
3550b57cec5SDimitry Andric // TODO: We could refine this to captured pointers that could possibly be
3560b57cec5SDimitry Andric // accessed by flat instructions. For now this is mostly a poor way of
3570b57cec5SDimitry Andric // estimating whether there are calls before argument lowering.
3585ffd83dbSDimitry Andric if (!IsFunc && HaveCall) {
3595ffd83dbSDimitry Andric F.addFnAttr("amdgpu-calls");
3605ffd83dbSDimitry Andric Changed = true;
3615ffd83dbSDimitry Andric }
3625ffd83dbSDimitry Andric
3635ffd83dbSDimitry Andric if (HaveStackObjects) {
3645ffd83dbSDimitry Andric F.addFnAttr("amdgpu-stack-objects");
3650b57cec5SDimitry Andric Changed = true;
3660b57cec5SDimitry Andric }
3670b57cec5SDimitry Andric
368*5f7ddb14SDimitry Andric // This pass cannot copy attributes from callees to callers
369*5f7ddb14SDimitry Andric // if there is an indirect call and in thus such cases,
370*5f7ddb14SDimitry Andric // hasAddressTaken() would be false for kernels and functions
371*5f7ddb14SDimitry Andric // making an indirect call (if they are themselves not indirectly called).
372*5f7ddb14SDimitry Andric // We must tag all such kernels/functions with all implicits attributes
373*5f7ddb14SDimitry Andric // for correctness.
374*5f7ddb14SDimitry Andric // e.g.
375*5f7ddb14SDimitry Andric // 1. Kernel K1 makes an indirect call to function F1.
376*5f7ddb14SDimitry Andric // Without detecting an indirect call in K1, this pass will not
377*5f7ddb14SDimitry Andric // add all implicit args to K1 (which is incorrect).
378*5f7ddb14SDimitry Andric // 2. Kernel K1 makes direct call to F1 which makes indirect call to function
379*5f7ddb14SDimitry Andric // F2.
380*5f7ddb14SDimitry Andric // Without detecting an indirect call in F1 (whose hasAddressTaken() is
381*5f7ddb14SDimitry Andric // false), the pass will not add all implicit args to F1 (which is
382*5f7ddb14SDimitry Andric // essential for correctness).
383*5f7ddb14SDimitry Andric if (CallingConvSupportsAllImplicits && HasIndirectCall) {
384*5f7ddb14SDimitry Andric for (StringRef AttrName : ImplicitAttrNames) {
385*5f7ddb14SDimitry Andric F.addFnAttr(AttrName);
386*5f7ddb14SDimitry Andric }
387*5f7ddb14SDimitry Andric Changed = true;
388*5f7ddb14SDimitry Andric }
389*5f7ddb14SDimitry Andric
3900b57cec5SDimitry Andric return Changed;
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric
runOnSCC(CallGraphSCC & SCC)3930b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
3940b57cec5SDimitry Andric bool Changed = false;
3950b57cec5SDimitry Andric
3960b57cec5SDimitry Andric for (CallGraphNode *I : SCC) {
3970b57cec5SDimitry Andric // Build a list of CallGraphNodes from most number of uses to least
3980b57cec5SDimitry Andric if (I->getNumReferences())
3990b57cec5SDimitry Andric NodeList.push_back(I);
4000b57cec5SDimitry Andric else {
4010b57cec5SDimitry Andric processUniformWorkGroupAttribute();
4020b57cec5SDimitry Andric NodeList.clear();
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric
4050b57cec5SDimitry Andric Function *F = I->getFunction();
406*5f7ddb14SDimitry Andric // Ignore functions with graphics calling conventions, these are currently
407*5f7ddb14SDimitry Andric // not allowed to have kernel arguments.
408*5f7ddb14SDimitry Andric if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
4090b57cec5SDimitry Andric continue;
410*5f7ddb14SDimitry Andric // Add feature attributes
4110b57cec5SDimitry Andric Changed |= addFeatureAttributes(*F);
4120b57cec5SDimitry Andric }
4130b57cec5SDimitry Andric
4140b57cec5SDimitry Andric return Changed;
4150b57cec5SDimitry Andric }
4160b57cec5SDimitry Andric
doInitialization(CallGraph & CG)4170b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
4180b57cec5SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
4190b57cec5SDimitry Andric if (!TPC)
4200b57cec5SDimitry Andric report_fatal_error("TargetMachine is required");
4210b57cec5SDimitry Andric
4220b57cec5SDimitry Andric TM = &TPC->getTM<TargetMachine>();
4230b57cec5SDimitry Andric return false;
4240b57cec5SDimitry Andric }
4250b57cec5SDimitry Andric
createAMDGPUAnnotateKernelFeaturesPass()4260b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
4270b57cec5SDimitry Andric return new AMDGPUAnnotateKernelFeatures();
4280b57cec5SDimitry Andric }
429